From c122f6082fec6f32ab2332dc99d9d5c37288d1ef Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 6 Dec 2018 17:27:10 +1000 Subject: [PATCH 1/2] Allow a very small build based on SHA-256 and RSA verify --- configure.ac | 409 +++-- wolfcrypt/benchmark/benchmark.c | 57 +- wolfcrypt/src/misc.c | 18 +- wolfcrypt/src/rsa.c | 93 +- wolfcrypt/src/signature.c | 7 +- wolfcrypt/src/sp_arm32.c | 2880 ++++++++++++++++--------------- wolfcrypt/src/sp_arm64.c | 1632 +++++++++--------- wolfcrypt/src/sp_armthumb.c | 326 ++-- wolfcrypt/src/sp_c32.c | 549 ++---- wolfcrypt/src/sp_c64.c | 278 +-- wolfcrypt/src/sp_int.c | 19 +- wolfcrypt/test/test.c | 83 +- wolfssl/wolfcrypt/rsa.h | 7 +- wolfssl/wolfcrypt/settings.h | 37 + 14 files changed, 3282 insertions(+), 3113 deletions(-) diff --git a/configure.ac b/configure.ac index cc9747c2e..46e612813 100644 --- a/configure.ac +++ b/configure.ac @@ -1770,6 +1770,48 @@ else fi fi +AC_ARG_ENABLE([oaep], + [AS_HELP_STRING([--enable-oaep],[Enable RSA OAEP (default: enabled)])], + [ ENABLED_OAEP=$enableval ], + [ ENABLED_OAEP=yes ] + ) + +if test "$ENABLED_OAEP" = "no" +then + AM_CFLAGS="$AM_CFLAGS -DWC_NO_RSA_OAEP" +fi + +AC_ARG_ENABLE([rsapub], + [AS_HELP_STRING([--enable-rsapub],[Enable RSA Public Only (default: disabled)])], + [ ENABLED_RSAPUB=$enableval ], + [ ENABLED_RSAPUB=no ] + ) + +if test "$ENABLED_RSAPUB" = "yes" +then + if test "$ENABLED_RSA" = "no" + then + ENABLED_RSA="yes" + fi + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RSA_PUBLIC_ONLY" +fi + +AC_ARG_ENABLE([rsavfy], + [AS_HELP_STRING([--enable-rsavfy],[Enable RSA Verify Inline Only (default: disabled)])], + [ ENABLED_RSAVFY=$enableval ], + [ ENABLED_RSAVFY=no ] + ) + +if test "$ENABLED_RSAVFY" = "yes" +then + if test "$ENABLED_RSA" = "no" + then + ENABLED_RSA="yes" + fi + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RSA_PUBLIC_ONLY -DWOLFSSL_RSA_VERIFY_ONLY" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RSA_VERIFY_INLINE -DNO_SIG_WRAPPER" +fi + AM_CONDITIONAL([BUILD_RSA], [test "x$ENABLED_RSA" = "xyes"]) @@ -1880,7 +1922,7 @@ else fi fi -if test "$ENABLED_RSA" = "yes" && test "$ENABLED_ASN" = "no" +if test "$ENABLED_RSA" = "yes" && test "$ENABLED_RSAVFY" = "no" && test "$ENABLED_ASN" = "no" then AC_MSG_ERROR([please disable rsa if disabling asn.]) fi @@ -1891,7 +1933,7 @@ then fi # DH and ECC need bigint -if test "$ENABLED_ASN" = "no" && test "$ENABLED_DH" = "no" && test "$ENABLED_ECC" = "no" +if test "$ENABLED_ASN" = "no" && test "$ENABLED_DH" = "no" && test "$ENABLED_ECC" = "no" && test "$ENABLED_RSA" = "no" then ENABLED_FASTMATH=no ENABLED_SLOWMATH=no @@ -3466,6 +3508,184 @@ then fi fi +# Single Precision maths implementation +AC_ARG_ENABLE([sp], + [AS_HELP_STRING([--enable-sp],[Enable Single Precision maths implementation (default: disabled)])], + [ ENABLED_SP=$enableval ], + [ ENABLED_SP=no ], + ) + +ENABLED_SP_RSA=no +ENABLED_SP_DH=no +ENABLED_SP_ECC=no +for v in `echo $ENABLED_SP | tr "," " "` +do + case $v in + small) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + ENABLED_SP_ECC=yes + ;; + yes) + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + ENABLED_SP_ECC=yes + ;; + no) + ;; + + smallec256 | smallp256 | small256) + ENABLED_SP_ECC=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ;; + ec256 | p256 | 256) + ENABLED_SP_ECC=yes + ;; + + small2048) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" + ;; + 2048) + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" + ;; + + smallrsa2048) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ENABLED_SP_RSA=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" + ;; + rsa2048) + ENABLED_SP_RSA=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" + ;; + + small3072) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" + ;; + 3072) + ENABLED_SP_RSA=yes + ENABLED_SP_DH=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" + ;; + + smallrsa3072) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" + ENABLED_SP_RSA=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" + ;; + rsa3072) + ENABLED_SP_RSA=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" + ;; + + *) + AC_MSG_ERROR([Invalid choice of Single Precision length in bits [256, 2048, 3072]: $ENABLED_SP.]) + break;; + esac +done + +ENABLED_SP=no +if test "$ENABLED_RSA" = "yes" && test "$ENABLED_SP_RSA" = "yes"; then + ENABLED_SP=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_RSA" +fi +if test "$ENABLED_DH" = "yes" && test "$ENABLED_SP_DH" = "yes"; then + ENABLED_SP=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_DH" +fi +if test "$ENABLED_ECC" = "yes" && test "$ENABLED_SP_ECC" = "yes"; then + ENABLED_SP=yes + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_ECC" +fi + + +AC_ARG_ENABLE([sp-asm], + [AS_HELP_STRING([--enable-sp-asm],[Enable Single Precision assembly implementation (default: disabled)])], + [ ENABLED_SP_ASM=$enableval ], + [ ENABLED_SP_ASM=no ], + ) +if test "$ENABLED_SP_ASM" = "yes"; then + if test "$ENABLED_SP" = "no"; then + AC_MSG_ERROR([Must have SP enabled: --enable-sp]) + fi + if test "$ENABLED_ASM" = "no"; then + AC_MSG_ERROR([Assembly code turned off]) + fi + + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ASM" + case $host_cpu in + *aarch64*) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM64_ASM" + ENABLED_SP_ARM64_ASM=yes + ;; + *arm*) + if test $host_alias = "thumb"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM -mthumb -march=armv6" + ENABLED_SP_ARM_THUMB_ASM=yes + else + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM" + ENABLED_SP_ARM32_ASM=yes + fi + ;; + *x86_64*) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64_ASM" + ENABLED_SP_X86_64_ASM=yes + ;; + *) + AC_MSG_ERROR([ASM not available for CPU. Supported CPUs: x86_64, aarch64, arm]) + ;; + esac +fi + +AC_ARG_ENABLE([sp-math], + [AS_HELP_STRING([--enable-sp-math],[Enable Single Precision math implementation only (default: disabled)])], + [ ENABLED_SP_MATH=$enableval ], + [ ENABLED_SP_MATH=no ], + ) +if test "$ENABLED_SP_MATH" = "yes"; then + if test "$ENABLED_SP" = "no"; then + AC_MSG_ERROR([Must have SP enabled: --enable-sp]) + fi + if test "$ENABLED_ECCCUSTCURVES" = "yes"; then + AC_MSG_ERROR([Cannot use single precision math and custom curves]) + fi + if test "$ENABLED_OPENSSLEXTRA" = "yes"; then + AC_MSG_ERROR([Cannot use single precision math and OpenSSL extra]) + fi + if test "$ENABLED_DSA" = "yes"; then + AC_MSG_ERROR([Cannot use single precision math and DSA]) + fi + if test "$ENABLED_SRP" = "yes"; then + AC_MSG_ERROR([Cannot use single precision math and SRP]) + fi + if test "$ENABLED_SP_RSA" = "no" && test "$ENABLED_RSA" = "yes"; then + AC_MSG_ERROR([Cannot use Single Precision maths without RSA with RSA]) + fi + if test "$ENABLED_SP_DH" = "no" && test "$ENABLED_DH" = "yes"; then + AC_MSG_ERROR([Cannot use Single Precision maths without DH with DH]) + fi +fi +if test "$ENABLED_SP_MATH" = "yes"; then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_MATH" +fi + +AM_CONDITIONAL([BUILD_SP], [test "x$ENABLED_SP" = "xyes"]) +AM_CONDITIONAL([BUILD_SP_C], [test "x$ENABLED_SP" = "xyes" && test "x$ENABLED_SP_ASM" = "xno" ]) +AM_CONDITIONAL([BUILD_SP_ARM64], [test "x$ENABLED_SP_ARM64_ASM" = "xyes" ]) +AM_CONDITIONAL([BUILD_SP_ARM32], [test "x$ENABLED_SP_ARM32_ASM" = "xyes" ]) +AM_CONDITIONAL([BUILD_SP_ARM_THUMB], [test "x$ENABLED_SP_ARM_THUMB_ASM" = "xyes" ]) +AM_CONDITIONAL([BUILD_SP_X86_64], [test "x$ENABLED_SP_X86_64_ASM" = "xyes" ]) +AM_CONDITIONAL([BUILD_SP_INT], [test "x$ENABLED_SP_MATH" = "xyes" ]) + # set fastmath default FASTMATH_DEFAULT=no @@ -3474,6 +3694,10 @@ if test "$host_cpu" = "x86_64" || test "$host_cpu" = "aarch64" then FASTMATH_DEFAULT=yes fi +if test "$ENABLED_SP_MATH" = "yes" +then + FASTMATH_DEFAULT=no +fi # fastmath AC_ARG_ENABLE([fastmath], @@ -3487,7 +3711,7 @@ then # turn off fastmth if leanpsk on or asn off (w/o DH and ECC) if test "$ENABLED_LEANPSK" = "yes" || test "$ENABLED_ASN" = "no" then - if test "$ENABLED_DH" = "no" && test "$ENABLED_ECC" = "no" + if test "$ENABLED_DH" = "no" && test "$ENABLED_ECC" = "no" && test "$ENABLED_RSA" = "no" then ENABLED_FASTMATH=no else @@ -3740,185 +3964,6 @@ AC_ARG_WITH([intelqa], ) AM_CONDITIONAL([BUILD_INTEL_QA], [test "x$ENABLED_INTEL_QA" = "xyes"]) - -# Single Precision maths implementation -AC_ARG_ENABLE([sp], - [AS_HELP_STRING([--enable-sp],[Enable Single Precision maths implementation (default: disabled)])], - [ ENABLED_SP=$enableval ], - [ ENABLED_SP=no ], - ) - -ENABLED_SP_RSA=no -ENABLED_SP_DH=no -ENABLED_SP_ECC=no -for v in `echo $ENABLED_SP | tr "," " "` -do - case $v in - small) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - ENABLED_SP_ECC=yes - ;; - yes) - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - ENABLED_SP_ECC=yes - ;; - no) - ;; - - smallec256 | smallp256 | small256) - ENABLED_SP_ECC=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ;; - ec256 | p256 | 256) - ENABLED_SP_ECC=yes - ;; - - small2048) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" - ;; - 2048) - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" - ;; - - smallrsa2048) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ENABLED_SP_RSA=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" - ;; - rsa2048) - ENABLED_SP_RSA=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_3072" - ;; - - small3072) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" - ;; - 3072) - ENABLED_SP_RSA=yes - ENABLED_SP_DH=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" - ;; - - smallrsa3072) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_SMALL" - ENABLED_SP_RSA=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" - ;; - rsa3072) - ENABLED_SP_RSA=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_2048" - ;; - - *) - AC_MSG_ERROR([Invalid choice of Single Precision length in bits [256, 2048, 3072]: $ENABLED_SP.]) - break;; - esac -done - -ENABLED_SP=no -if test "$ENABLED_RSA" = "yes" && test "$ENABLED_SP_RSA" = "yes"; then - ENABLED_SP=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_RSA" -fi -if test "$ENABLED_DH" = "yes" && test "$ENABLED_SP_DH" = "yes"; then - ENABLED_SP=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_DH" -fi -if test "$ENABLED_ECC" = "yes" && test "$ENABLED_SP_ECC" = "yes"; then - ENABLED_SP=yes - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_HAVE_SP_ECC" -fi - - -AC_ARG_ENABLE([sp-asm], - [AS_HELP_STRING([--enable-sp-asm],[Enable Single Precision assembly implementation (default: disabled)])], - [ ENABLED_SP_ASM=$enableval ], - [ ENABLED_SP_ASM=no ], - ) -if test "$ENABLED_SP_ASM" = "yes"; then - if test "$ENABLED_SP" = "no"; then - AC_MSG_ERROR([Must have SP enabled: --enable-sp]) - fi - if test "$ENABLED_ASM" = "no"; then - AC_MSG_ERROR([Assembly code turned off]) - fi - - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ASM" - case $host_cpu in - *aarch64*) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM64_ASM" - ENABLED_SP_ARM64_ASM=yes - ;; - *arm*) - if test $host_alias = "thumb"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM -mthumb -march=armv6" - ENABLED_SP_ARM_THUMB_ASM=yes - else - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM" - ENABLED_SP_ARM32_ASM=yes - fi - ;; - *x86_64*) - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_X86_64_ASM" - ENABLED_SP_X86_64_ASM=yes - ;; - *) - AC_MSG_ERROR([ASM not available for CPU. Supported CPUs: x86_64, aarch64, arm]) - ;; - esac -fi - -AC_ARG_ENABLE([sp-math], - [AS_HELP_STRING([--enable-sp-math],[Enable Single Precision math implementation only (default: disabled)])], - [ ENABLED_SP_MATH=$enableval ], - [ ENABLED_SP_MATH=no ], - ) -if test "$ENABLED_SP_MATH" = "yes"; then - if test "$ENABLED_SP" = "no"; then - AC_MSG_ERROR([Must have SP enabled: --enable-sp]) - fi - if test "$ENABLED_ECCCUSTCURVES" = "yes"; then - AC_MSG_ERROR([Cannot use single precision math and custom curves]) - fi - if test "$ENABLED_OPENSSLEXTRA" = "yes"; then - AC_MSG_ERROR([Cannot use single precision math and OpenSSL extra]) - fi - if test "$ENABLED_DSA" = "yes"; then - AC_MSG_ERROR([Cannot use single precision math and DSA]) - fi - if test "$ENABLED_SRP" = "yes"; then - AC_MSG_ERROR([Cannot use single precision math and SRP]) - fi - if test "$ENABLED_SP_RSA" = "no" && test "$ENABLED_RSA" = "yes"; then - AC_MSG_ERROR([Cannot use P256 single precision only math and RSA]) - fi - if test "$ENABLED_SP_DH" = "no" && test "$ENABLED_DH" = "yes"; then - AC_MSG_ERROR([Cannot use P256 single precision only math and DH]) - fi -fi -if test "$ENABLED_SP_MATH" = "yes"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_MATH" -fi - -AM_CONDITIONAL([BUILD_SP], [test "x$ENABLED_SP" = "xyes"]) -AM_CONDITIONAL([BUILD_SP_C], [test "x$ENABLED_SP" = "xyes" && test "x$ENABLED_SP_ASM" = "xno" ]) -AM_CONDITIONAL([BUILD_SP_ARM64], [test "x$ENABLED_SP_ARM64_ASM" = "xyes" ]) -AM_CONDITIONAL([BUILD_SP_ARM32], [test "x$ENABLED_SP_ARM32_ASM" = "xyes" ]) -AM_CONDITIONAL([BUILD_SP_ARM_THUMB], [test "x$ENABLED_SP_ARM_THUMB_ASM" = "xyes" ]) -AM_CONDITIONAL([BUILD_SP_X86_64], [test "x$ENABLED_SP_X86_64_ASM" = "xyes" ]) -AM_CONDITIONAL([BUILD_SP_INT], [test "x$ENABLED_SP_MATH" = "xyes" ]) - # Fast RSA using Intel IPP ippdir="${srcdir}/IPP" ipplib="lib" # if autoconf guesses 32bit system changes lib directory diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 630ea9ef9..6fd063d4a 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -458,7 +458,8 @@ static const char* bench_result_words1[][4] = { #endif }; -#if !defined(NO_RSA) ||defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ +#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) ||\ + defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ defined(HAVE_ECC) || !defined(NO_DH) || defined(HAVE_ECC_ENCRYPT) || \ defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \ defined(HAVE_ED25519) @@ -568,7 +569,7 @@ static const char* bench_desc_words[][9] = { #endif #endif -#if !defined(NO_RSA) || !defined(NO_DH) \ +#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) \ || defined(WOLFSSL_KEYGEN) || defined(HAVE_ECC) \ || defined(HAVE_CURVE25519) || defined(HAVE_ED25519) #define HAVE_LOCAL_RNG @@ -577,7 +578,8 @@ static const char* bench_desc_words[][9] = { #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) || defined(HAVE_ECC) || \ defined(HAVE_ECC) || defined(HAVE_NTRU) || !defined(NO_DH) || \ - !defined(NO_RSA) || defined(HAVE_SCRYPT) + (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \ + defined(HAVE_SCRYPT) #define BENCH_ASYM #endif @@ -776,7 +778,9 @@ static int rsa_sign_verify = 0; /* Don't print out in CSV format by default */ static int csv_format = 0; +#ifdef BENCH_ASYM static int csv_header_count = 0; +#endif /* for compatibility */ #define BENCH_SIZE bench_size @@ -3861,28 +3865,43 @@ void bench_rsaKeyGen_size(int doAsync, int keySz) static void bench_rsa_helper(int doAsync, RsaKey rsaKey[BENCH_MAX_PENDING], int rsaKeySz) { +#ifndef WOLFSSL_RSA_VERIFY_ONLY int ret = 0, i, times, count = 0, pending = 0; +#ifndef WOLFSSL_RSA_PUBLIC_ONLY word32 idx = 0; +#endif const char* messageStr = "Everyone gets Friday off."; const int len = (int)XSTRLEN((char*)messageStr); double start = 0.0f; const char**desc = bench_desc_words[lng_index]; DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT); +#else + (void)doAsync; + (void)rsaKey; + (void)rsaKeySz; +#endif +#ifndef WOLFSSL_RSA_VERIFY_ONLY #ifdef USE_CERT_BUFFERS_1024 DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); #elif defined(USE_CERT_BUFFERS_2048) DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); - DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); + #ifndef WOLFSSL_RSA_PUBLIC_ONLY + DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); + #endif #elif defined(USE_CERT_BUFFERS_3072) - DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); - DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); + DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); + #ifndef WOLFSSL_RSA_PUBLIC_ONLY + DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); + #endif #else #error "need a cert buffer size" #endif /* USE_CERT_BUFFERS */ +#endif if (!rsa_sign_verify) { +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* begin public RSA */ bench_stats_start(&count, &start); do { @@ -3908,7 +3927,9 @@ static void bench_rsa_helper(int doAsync, RsaKey rsaKey[BENCH_MAX_PENDING], exit_rsa_pub: bench_stats_asym_finish("RSA", rsaKeySz, desc[0], doAsync, count, start, ret); +#endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY if (ret < 0) { goto exit; } @@ -3941,8 +3962,10 @@ exit_rsa_pub: exit: bench_stats_asym_finish("RSA", rsaKeySz, desc[1], doAsync, count, start, ret); +#endif } else { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* begin RSA sign */ bench_stats_start(&count, &start); do { @@ -4001,6 +4024,7 @@ exit_rsa_sign: exit_rsa_verify: bench_stats_asym_finish("RSA", rsaKeySz, desc[5], doAsync, count, start, ret); +#endif } FREE_ARRAY(enc, BENCH_MAX_PENDING, HEAP_HINT); @@ -4051,6 +4075,7 @@ void bench_rsa(int doAsync) goto exit_bench_rsa; #endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* decode the private key */ idx = 0; if ((ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey[i], @@ -4058,6 +4083,24 @@ void bench_rsa(int doAsync) printf("wc_RsaPrivateKeyDecode failed! %d\n", ret); goto exit_bench_rsa; } +#else + #ifdef USE_CERT_BUFFERS_2048 + ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[13], 256); + if (ret != 0) { + printf("Setting modulus failed! %d\n", ret); + goto exit_bench_rsa; + } + ret = mp_set_int(&rsaKey[i].e, WC_RSA_EXPONENT); + if (ret != 0) { + printf("Setting public exponent failed! %d\n", ret); + goto exit_bench_rsa; + } + #else + #error Not supported yet! + #endif + (void)idx; + (void)bytes; +#endif } bench_rsa_helper(doAsync, rsaKey, rsaKeySz); @@ -5258,10 +5301,12 @@ int main(int argc, char** argv) else if (string_matches(argv[1], "-rsa_sign")) rsa_sign_verify = 1; #endif +#ifdef BENCH_ASYM else if (string_matches(argv[1], "-csv")) { csv_format = 1; csv_header_count = 1; } +#endif else if (argv[1][0] == '-') { optMatched = 0; #ifndef WOLFSSL_BENCHMARK_ALL diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 5ce25bf7c..8aa1d8501 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -127,7 +127,7 @@ STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in, } -#ifdef WORD64_AVAILABLE +#if defined(WORD64_AVAILABLE) && !defined(WOLFSSL_NO_WORD64_OPS) STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y) @@ -169,9 +169,9 @@ STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in, } -#endif /* WORD64_AVAILABLE */ - +#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */ +#ifndef WOLFSSL_NO_XOR_OPS STATIC WC_INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n) { word32 i; @@ -193,8 +193,9 @@ STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count) for (i = 0; i < count; i++) b[i] ^= m[i]; } } +#endif - +#ifndef WOLFSSL_NO_FORCE_ZERO /* Make sure compiler doesn't skip */ STATIC WC_INLINE void ForceZero(const void* mem, word32 len) { @@ -217,8 +218,10 @@ STATIC WC_INLINE void ForceZero(const void* mem, word32 len) while (len--) *z++ = 0; } +#endif +#ifndef WOLFSSL_NO_CONST_CMP /* check all length bytes for equality, return 0 on success */ STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length) { @@ -231,6 +234,7 @@ STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length) return compareSum; } +#endif #ifndef WOLFSSL_HAVE_MIN @@ -255,6 +259,7 @@ STATIC WC_INLINE int ConstantCompare(const byte* a, const byte* b, int length) } #endif /* !WOLFSSL_HAVE_MAX */ +#ifndef WOLFSSL_NO_INT_ENCODE /* converts a 32 bit integer to 24 bit */ STATIC WC_INLINE void c32to24(word32 in, word24 out) { @@ -278,7 +283,9 @@ STATIC WC_INLINE void c32toa(word32 wc_u32, byte* c) c[2] = (wc_u32 >> 8) & 0xff; c[3] = wc_u32 & 0xff; } +#endif +#ifndef WOLFSSL_NO_INT_DECODE /* convert a 24 bit integer into a 32 bit one */ STATIC WC_INLINE void c24to32(const word24 wc_u24, word32* wc_u32) { @@ -309,8 +316,10 @@ STATIC WC_INLINE word32 btoi(byte b) { return (word32)(b - 0x30); } +#endif +#ifndef WOLFSSL_NO_CT_OPS /* Constant time - mask set when a > b. */ STATIC WC_INLINE byte ctMaskGT(int a, int b) { @@ -365,6 +374,7 @@ STATIC WC_INLINE byte ctSetLTE(int a, int b) { return ((word32)a - b - 1) >> 31; } +#endif #undef STATIC diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index d66ad10be..896d09e2c 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -96,6 +96,7 @@ int wc_FreeRsaKey(RsaKey* key) } +#ifndef WOLFSSL_RSA_VERIFY_ONLY int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, WC_RNG* rng) { @@ -104,8 +105,10 @@ int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, } return RsaPublicEncrypt_fips(in, inLen, out, outLen, key, rng); } +#endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key) { @@ -134,6 +137,7 @@ int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, } return RsaSSL_Sign_fips(in, inLen, out, outLen, key, rng); } +#endif int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) @@ -164,6 +168,7 @@ int wc_RsaEncryptSize(RsaKey* key) } +#ifndef WOLFSSL_RSA_VERIFY_ONLY int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b, word32* bSz) { @@ -171,6 +176,7 @@ int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b, /* not specified as fips so not needing _fips */ return RsaFlattenPublicKey(key, a, aSz, b, bSz); } +#endif #ifdef WOLFSSL_KEY_GEN @@ -215,19 +221,25 @@ enum { static void wc_RsaCleanup(RsaKey* key) { +#ifndef WOLFSSL_RSA_VERIFY_INLINE if (key && key->data) { /* make sure any allocated memory is free'd */ if (key->dataIsAlloc) { + #ifndef WOLFSSL_RSA_PUBLIC_ONLY if (key->type == RSA_PRIVATE_DECRYPT || key->type == RSA_PRIVATE_ENCRYPT) { ForceZero(key->data, key->dataLen); } + #endif XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT); key->dataIsAlloc = 0; } key->data = NULL; key->dataLen = 0; } +#else + (void)key; +#endif } int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId) @@ -243,9 +255,11 @@ int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId) key->type = RSA_TYPE_UNKNOWN; key->state = RSA_STATE_NONE; key->heap = heap; - key->data = NULL; - key->dataLen = 0; +#ifndef WOLFSSL_RSA_VERIFY_INLINE key->dataIsAlloc = 0; + key->data = NULL; +#endif + key->dataLen = 0; #ifdef WC_RSA_BLINDING key->rng = NULL; #endif @@ -270,6 +284,7 @@ int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId) #endif /* WC_ASYNC_ENABLE_RSA */ #endif /* WOLFSSL_ASYNC_CRYPT */ +#ifndef WOLFSSL_RSA_PUBLIC_ONLY ret = mp_init_multi(&key->n, &key->e, NULL, NULL, NULL, NULL); if (ret != MP_OKAY) return ret; @@ -284,6 +299,16 @@ int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId) mp_clear(&key->e); return ret; } +#else + ret = mp_init(&key->n); + if (ret != MP_OKAY) + return ret; + ret = mp_init(&key->e); + if (ret != MP_OKAY) { + mp_clear(&key->n); + return ret; + } +#endif #ifdef WOLFSSL_XILINX_CRYPT key->pubExp = 0; @@ -411,6 +436,7 @@ int wc_FreeRsaKey(RsaKey* key) wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA); #endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY if (key->type == RSA_PRIVATE) { #if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM) mp_forcezero(&key->u); @@ -430,6 +456,7 @@ int wc_FreeRsaKey(RsaKey* key) mp_clear(&key->q); mp_clear(&key->p); mp_clear(&key->d); +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ /* public part */ mp_clear(&key->e); @@ -443,7 +470,7 @@ int wc_FreeRsaKey(RsaKey* key) return ret; } - +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* Check the pair-wise consistency of the RSA key. * From NIST SP 800-56B, section 6.4.1.1. * Verify that k = (k^e)^d, for some k: 1 < k < n-1. */ @@ -532,6 +559,7 @@ int wc_CheckRsaKey(RsaKey* key) return ret; } +#endif #if !defined(WC_NO_RSA_OAEP) || defined(WC_RSA_PSS) @@ -944,6 +972,7 @@ static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock, XMEMSET(&pkcsBlock[1], 0xFF, pkcsBlockLen - inputLen - 2); } else { +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* pad with non-zero random bytes */ word32 padLen, i; int ret; @@ -963,6 +992,9 @@ static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock, for (i = 1; i < padLen; i++) { if (pkcsBlock[i] == 0) pkcsBlock[i] = 0x01; } +#else + return RSA_WRONG_TYPE_E; +#endif } pkcsBlock[pkcsBlockLen-inputLen-1] = 0; /* separator */ @@ -972,6 +1004,7 @@ static int RsaPad(const byte* input, word32 inputLen, byte* pkcsBlock, } #endif /* !WC_NO_RNG */ +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* helper function to direct which padding is used */ static int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock, word32 pkcsBlockLen, byte padValue, WC_RNG* rng, int padType, @@ -1133,6 +1166,7 @@ static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen, return pkcsBlockLen - idx; } #endif /* WC_NO_RSA_OAEP */ +#endif #ifdef WC_RSA_PSS /* 0x00 .. 0x00 0x01 | Salt | Gen Hash | 0xbc @@ -1491,6 +1525,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, #ifndef WOLFSSL_SP_NO_2048 if (mp_count_bits(&key->n) == 2048) { switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY case RSA_PRIVATE_DECRYPT: case RSA_PRIVATE_ENCRYPT: #ifdef WC_RSA_BLINDING @@ -1505,6 +1540,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, return sp_RsaPrivate_2048(in, inLen, &key->d, &key->p, &key->q, NULL, NULL, NULL, &key->n, out, outLen); #endif +#endif case RSA_PUBLIC_ENCRYPT: case RSA_PUBLIC_DECRYPT: return sp_RsaPublic_2048(in, inLen, &key->e, &key->n, out, outLen); @@ -1514,6 +1550,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, #ifndef WOLFSSL_SP_NO_3072 if (mp_count_bits(&key->n) == 3072) { switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY case RSA_PRIVATE_DECRYPT: case RSA_PRIVATE_ENCRYPT: #ifdef WC_RSA_BLINDING @@ -1528,6 +1565,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, return sp_RsaPrivate_3072(in, inLen, &key->d, &key->p, &key->q, NULL, NULL, NULL, &key->n, out, outLen); #endif +#endif case RSA_PUBLIC_ENCRYPT: case RSA_PUBLIC_DECRYPT: return sp_RsaPublic_3072(in, inLen, &key->e, &key->n, out, outLen); @@ -1537,6 +1575,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_SP_MATH + (void)rng; return WC_KEY_SIZE_E; #else (void)rng; @@ -1575,6 +1614,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, if (ret == 0) { switch(type) { + #ifndef WOLFSSL_RSA_PUBLIC_ONLY case RSA_PRIVATE_DECRYPT: case RSA_PRIVATE_ENCRYPT: { @@ -1678,6 +1718,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out, break; } + #endif case RSA_PUBLIC_ENCRYPT: case RSA_PUBLIC_DECRYPT: #ifdef WOLFSSL_XILINX_CRYPT @@ -1751,6 +1792,7 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out, #endif /* WOLFSSL_ASYNC_CRYPT_TEST */ switch(type) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY case RSA_PRIVATE_DECRYPT: case RSA_PRIVATE_ENCRYPT: #ifdef HAVE_CAVIUM @@ -1775,6 +1817,7 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out, ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng); #endif break; +#endif case RSA_PUBLIC_ENCRYPT: case RSA_PUBLIC_DECRYPT: @@ -2008,6 +2051,7 @@ int wc_RsaFunction(const byte* in, word32 inLen, byte* out, } +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* Internal Wrappers */ /* Gives the option of choosing padding type in : input to be encrypted @@ -2129,6 +2173,7 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out, return ret; } +#endif /* Gives the option of choosing padding type in : input to be decrypted @@ -2170,12 +2215,14 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, /* Async operations that include padding */ if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && pad_type != WC_RSA_PSS_PAD) { +#ifndef WOLFSSL_RSA_PUBLIC_ONLY if (rsa_type == RSA_PRIVATE_DECRYPT && pad_value == RSA_BLOCK_TYPE_2) { key->state = RSA_STATE_DECRYPT_RES; key->data = NULL; return NitroxRsaPrivateDecrypt(in, inLen, out, &key->dataLen, key); +#endif } else if (rsa_type == RSA_PUBLIC_DECRYPT && pad_value == RSA_BLOCK_TYPE_1) { @@ -2186,6 +2233,7 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, } #endif +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) /* verify the tmp ptr is NULL, otherwise indicates bad state */ if (key->data != NULL) { ret = BAD_STATE_E; @@ -2206,13 +2254,19 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, else { key->data = out; } +#endif key->state = RSA_STATE_DECRYPT_EXPTMOD; FALL_THROUGH; case RSA_STATE_DECRYPT_EXPTMOD: +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen, rsa_type, key, rng); +#else + ret = wc_RsaFunction(out, inLen, out, &key->dataLen, rsa_type, key, + rng); +#endif if (ret >= 0 || ret == WC_PENDING_E) { key->state = RSA_STATE_DECRYPT_UNPAD; @@ -2226,16 +2280,25 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, case RSA_STATE_DECRYPT_UNPAD: { byte* pad = NULL; +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_VERIFY_INLINE) ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type, hash, mgf, label, labelSz, saltLen, mp_count_bits(&key->n), key->heap); +#else + ret = wc_RsaUnPad_ex(out, key->dataLen, &pad, pad_value, pad_type, hash, + mgf, label, labelSz, saltLen, + mp_count_bits(&key->n), key->heap); +#endif if (rsa_type == RSA_PUBLIC_DECRYPT && ret > (int)outLen) ret = RSA_BUFFER_E; else if (ret >= 0 && pad != NULL) { +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) signed char c; +#endif /* only copy output if not inline */ if (outPtr == NULL) { +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) word32 i, j; int start = (int)((size_t)pad - (size_t)key->data); @@ -2246,12 +2309,20 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, /* 0 - no add, -1 add */ i += -c; } +#else + XMEMCPY(out, pad, ret); +#endif } else *outPtr = pad; +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) ret = ctMaskSelInt(ctMaskLTE(ret, outLen), ret, RSA_BUFFER_E); ret = ctMaskSelInt(ctMaskNotEq(ret, 0), ret, RSA_BUFFER_E); +#else + if (outLen < (word32)ret) + ret = RSA_BUFFER_E; +#endif } key->state = RSA_STATE_DECRYPT_RES; @@ -2296,6 +2367,7 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out, } +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* Public RSA Functions */ int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, WC_RNG* rng) @@ -2316,8 +2388,10 @@ int wc_RsaPublicEncrypt_ex(const byte* in, word32 inLen, byte* out, RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng); } #endif /* WC_NO_RSA_OAEP */ +#endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key) { WC_RNG* rng = NULL; @@ -2373,6 +2447,7 @@ int wc_RsaPrivateDecrypt_ex(const byte* in, word32 inLen, byte* out, labelSz, 0, rng); } #endif /* WC_NO_RSA_OAEP || WC_RSA_NO_PADDING */ +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) @@ -2386,6 +2461,7 @@ int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); } +#ifndef WOLFSSL_RSA_VERIFY_ONLY int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key) { @@ -2403,6 +2479,7 @@ int wc_RsaSSL_Verify(const byte* in, word32 inLen, byte* out, word32 outLen, RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); } +#endif #ifdef WC_RSA_PSS /* Verify the message signed with RSA-PSS. @@ -2667,6 +2744,7 @@ int wc_RsaPSS_VerifyCheck(byte* in, word32 inLen, byte* out, word32 outLen, #endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, WC_RNG* rng) { @@ -2720,7 +2798,9 @@ int wc_RsaPSS_Sign_ex(const byte* in, word32 inLen, byte* out, word32 outLen, hash, mgf, NULL, 0, saltLen, rng); } #endif +#endif +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_MATH) int wc_RsaEncryptSize(RsaKey* key) { int ret; @@ -2739,8 +2819,9 @@ int wc_RsaEncryptSize(RsaKey* key) return ret; } +#endif - +#ifndef WOLFSSL_RSA_VERIFY_ONLY /* flatten RsaKey structure into individual elements (e, n) */ int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n, word32* nSz) @@ -2769,11 +2850,12 @@ int wc_RsaFlattenPublicKey(RsaKey* key, byte* e, word32* eSz, byte* n, return 0; } - +#endif #endif /* HAVE_FIPS */ +#ifndef WOLFSSL_RSA_VERIFY_ONLY static int RsaGetValue(mp_int* in, byte* out, word32* outSz) { word32 sz; @@ -2818,6 +2900,7 @@ int wc_RsaExportKey(RsaKey* key, return ret; } +#endif #ifdef WOLFSSL_KEY_GEN diff --git a/wolfcrypt/src/signature.c b/wolfcrypt/src/signature.c index 7c9013b1a..aecaa679b 100644 --- a/wolfcrypt/src/signature.c +++ b/wolfcrypt/src/signature.c @@ -356,7 +356,7 @@ int wc_SignatureGenerateHash( case WC_SIGNATURE_TYPE_RSA_W_ENC: case WC_SIGNATURE_TYPE_RSA: -#ifndef NO_RSA +#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* Create signature using provided RSA key */ do { #ifdef WOLFSSL_ASYNC_CRYPT @@ -420,7 +420,7 @@ int wc_SignatureGenerate( } hash_enc_len = hash_len = ret; -#ifndef NO_RSA +#if !defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { /* For RSA with ASN.1 encoding include room */ hash_enc_len += MAX_DER_DIGEST_ASN_SZ; @@ -440,7 +440,8 @@ int wc_SignatureGenerate( if (ret == 0) { /* Handle RSA with DER encoding */ if (sig_type == WC_SIGNATURE_TYPE_RSA_W_ENC) { - #if defined(NO_RSA) || defined(NO_ASN) + #if defined(NO_RSA) || defined(NO_ASN) || \ + defined(WOLFSSL_RSA_PUBLIC_ONLY) ret = SIG_TYPE_E; #else ret = wc_SignatureDerEncode(hash_type, hash_data, hash_len, diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index aae5149c4..7cbbf094a 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -2538,7 +2538,8 @@ static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -2758,7 +2759,7 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -2779,7 +2780,563 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #256\n\t" + "blt 1b\n\t" + "str r3, [%[r], #256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "str r3, [%[r], #252]\n\t" + "str r4, [%[r], #256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -4335,7 +4892,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -5359,561 +5916,6 @@ static void sp_2048_mont_sqr_64(sp_digit* r, sp_digit* a, sp_digit* m, sp_2048_mont_reduce_64(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r5, r3, %[b], r8\n\t" - "mov r4, #0\n\t" - "str r5, [%[r]]\n\t" - "mov r5, #0\n\t" - "mov r9, #4\n\t" - "1:\n\t" - "ldr r8, [%[a], r9]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], r9]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r9, r9, #4\n\t" - "cmp r9, #256\n\t" - "blt 1b\n\t" - "str r3, [%[r], #256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #188]\n\t" - "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #192]\n\t" - "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #196]\n\t" - "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #200]\n\t" - "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #204]\n\t" - "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #208]\n\t" - "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #212]\n\t" - "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #216]\n\t" - "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #220]\n\t" - "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #224]\n\t" - "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #228]\n\t" - "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #232]\n\t" - "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #236]\n\t" - "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #240]\n\t" - "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #244]\n\t" - "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #248]\n\t" - "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adc r4, r4, r7\n\t" - "str r3, [%[r], #252]\n\t" - "str r4, [%[r], #256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -5972,35 +5974,6 @@ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) return r; } -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_2048_mask_64(sp_digit* r, sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<64; i++) - r[i] = a[i] & m; -#else - int i; - - for (i = 0; i < 64; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } -#endif -} - /* Compare a with b in constant time. * * a A single precision integer. @@ -6560,6 +6533,36 @@ static int32_t sp_2048_cmp_64(sp_digit* a, sp_digit* b) return r; } +#if !defined(NO_DH) || (defined(SP_RSA_PRIVATE_EXP_D) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_2048_mask_64(sp_digit* r, sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<64; i++) + r[i] = a[i] & m; +#else + int i; + + for (i = 0; i < 64; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -6598,6 +6601,8 @@ static WC_INLINE int sp_2048_div_64(sp_digit* a, sp_digit* d, sp_digit* m, return MP_OKAY; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#if !defined(NO_DH) || (defined(SP_RSA_PRIVATE_EXP_D) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) /* Reduce a modulo m into r. (r = a mod m) * * r A single precision number that is the reduced result. @@ -6610,6 +6615,7 @@ static WC_INLINE int sp_2048_mod_64(sp_digit* r, sp_digit* a, sp_digit* m) return sp_2048_div_64(a, m, NULL, r); } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -7063,6 +7069,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7187,6 +7194,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. @@ -10300,7 +10308,8 @@ static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -10477,7 +10486,7 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -10498,7 +10507,819 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r5, r3, %[b], r8\n\t" + "mov r4, #0\n\t" + "str r5, [%[r]]\n\t" + "mov r5, #0\n\t" + "mov r9, #4\n\t" + "1:\n\t" + "ldr r8, [%[a], r9]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], r9]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "mov r5, #0\n\t" + "add r9, r9, #4\n\t" + "cmp r9, #384\n\t" + "blt 1b\n\t" + "str r3, [%[r], #384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#else + __asm__ __volatile__ ( + "mov r10, #0\n\t" + "# A[0] * B\n\t" + "ldr r8, [%[a]]\n\t" + "umull r3, r4, %[b], r8\n\t" + "mov r5, #0\n\t" + "str r3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr r8, [%[a], #4]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #4]\n\t" + "# A[2] * B\n\t" + "ldr r8, [%[a], #8]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #8]\n\t" + "# A[3] * B\n\t" + "ldr r8, [%[a], #12]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #12]\n\t" + "# A[4] * B\n\t" + "ldr r8, [%[a], #16]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #16]\n\t" + "# A[5] * B\n\t" + "ldr r8, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #20]\n\t" + "# A[6] * B\n\t" + "ldr r8, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #24]\n\t" + "# A[7] * B\n\t" + "ldr r8, [%[a], #28]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #28]\n\t" + "# A[8] * B\n\t" + "ldr r8, [%[a], #32]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #32]\n\t" + "# A[9] * B\n\t" + "ldr r8, [%[a], #36]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #36]\n\t" + "# A[10] * B\n\t" + "ldr r8, [%[a], #40]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #40]\n\t" + "# A[11] * B\n\t" + "ldr r8, [%[a], #44]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #44]\n\t" + "# A[12] * B\n\t" + "ldr r8, [%[a], #48]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #48]\n\t" + "# A[13] * B\n\t" + "ldr r8, [%[a], #52]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #52]\n\t" + "# A[14] * B\n\t" + "ldr r8, [%[a], #56]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #56]\n\t" + "# A[15] * B\n\t" + "ldr r8, [%[a], #60]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #60]\n\t" + "# A[16] * B\n\t" + "ldr r8, [%[a], #64]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #64]\n\t" + "# A[17] * B\n\t" + "ldr r8, [%[a], #68]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #68]\n\t" + "# A[18] * B\n\t" + "ldr r8, [%[a], #72]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #72]\n\t" + "# A[19] * B\n\t" + "ldr r8, [%[a], #76]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #76]\n\t" + "# A[20] * B\n\t" + "ldr r8, [%[a], #80]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #80]\n\t" + "# A[21] * B\n\t" + "ldr r8, [%[a], #84]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #84]\n\t" + "# A[22] * B\n\t" + "ldr r8, [%[a], #88]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #88]\n\t" + "# A[23] * B\n\t" + "ldr r8, [%[a], #92]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #92]\n\t" + "# A[24] * B\n\t" + "ldr r8, [%[a], #96]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #96]\n\t" + "# A[25] * B\n\t" + "ldr r8, [%[a], #100]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #100]\n\t" + "# A[26] * B\n\t" + "ldr r8, [%[a], #104]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #104]\n\t" + "# A[27] * B\n\t" + "ldr r8, [%[a], #108]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #108]\n\t" + "# A[28] * B\n\t" + "ldr r8, [%[a], #112]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #112]\n\t" + "# A[29] * B\n\t" + "ldr r8, [%[a], #116]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #116]\n\t" + "# A[30] * B\n\t" + "ldr r8, [%[a], #120]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #120]\n\t" + "# A[31] * B\n\t" + "ldr r8, [%[a], #124]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #124]\n\t" + "# A[32] * B\n\t" + "ldr r8, [%[a], #128]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #128]\n\t" + "# A[33] * B\n\t" + "ldr r8, [%[a], #132]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #132]\n\t" + "# A[34] * B\n\t" + "ldr r8, [%[a], #136]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #136]\n\t" + "# A[35] * B\n\t" + "ldr r8, [%[a], #140]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #140]\n\t" + "# A[36] * B\n\t" + "ldr r8, [%[a], #144]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #144]\n\t" + "# A[37] * B\n\t" + "ldr r8, [%[a], #148]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #148]\n\t" + "# A[38] * B\n\t" + "ldr r8, [%[a], #152]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #152]\n\t" + "# A[39] * B\n\t" + "ldr r8, [%[a], #156]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #156]\n\t" + "# A[40] * B\n\t" + "ldr r8, [%[a], #160]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #160]\n\t" + "# A[41] * B\n\t" + "ldr r8, [%[a], #164]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #164]\n\t" + "# A[42] * B\n\t" + "ldr r8, [%[a], #168]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #168]\n\t" + "# A[43] * B\n\t" + "ldr r8, [%[a], #172]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #172]\n\t" + "# A[44] * B\n\t" + "ldr r8, [%[a], #176]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #176]\n\t" + "# A[45] * B\n\t" + "ldr r8, [%[a], #180]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #180]\n\t" + "# A[46] * B\n\t" + "ldr r8, [%[a], #184]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #184]\n\t" + "# A[47] * B\n\t" + "ldr r8, [%[a], #188]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #188]\n\t" + "# A[48] * B\n\t" + "ldr r8, [%[a], #192]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #192]\n\t" + "# A[49] * B\n\t" + "ldr r8, [%[a], #196]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #196]\n\t" + "# A[50] * B\n\t" + "ldr r8, [%[a], #200]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #200]\n\t" + "# A[51] * B\n\t" + "ldr r8, [%[a], #204]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #204]\n\t" + "# A[52] * B\n\t" + "ldr r8, [%[a], #208]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #208]\n\t" + "# A[53] * B\n\t" + "ldr r8, [%[a], #212]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #212]\n\t" + "# A[54] * B\n\t" + "ldr r8, [%[a], #216]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #216]\n\t" + "# A[55] * B\n\t" + "ldr r8, [%[a], #220]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #220]\n\t" + "# A[56] * B\n\t" + "ldr r8, [%[a], #224]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #224]\n\t" + "# A[57] * B\n\t" + "ldr r8, [%[a], #228]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #228]\n\t" + "# A[58] * B\n\t" + "ldr r8, [%[a], #232]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #232]\n\t" + "# A[59] * B\n\t" + "ldr r8, [%[a], #236]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #236]\n\t" + "# A[60] * B\n\t" + "ldr r8, [%[a], #240]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #240]\n\t" + "# A[61] * B\n\t" + "ldr r8, [%[a], #244]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #244]\n\t" + "# A[62] * B\n\t" + "ldr r8, [%[a], #248]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #248]\n\t" + "# A[63] * B\n\t" + "ldr r8, [%[a], #252]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #252]\n\t" + "# A[64] * B\n\t" + "ldr r8, [%[a], #256]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #256]\n\t" + "# A[65] * B\n\t" + "ldr r8, [%[a], #260]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #260]\n\t" + "# A[66] * B\n\t" + "ldr r8, [%[a], #264]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #264]\n\t" + "# A[67] * B\n\t" + "ldr r8, [%[a], #268]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #268]\n\t" + "# A[68] * B\n\t" + "ldr r8, [%[a], #272]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #272]\n\t" + "# A[69] * B\n\t" + "ldr r8, [%[a], #276]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #276]\n\t" + "# A[70] * B\n\t" + "ldr r8, [%[a], #280]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #280]\n\t" + "# A[71] * B\n\t" + "ldr r8, [%[a], #284]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #284]\n\t" + "# A[72] * B\n\t" + "ldr r8, [%[a], #288]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #288]\n\t" + "# A[73] * B\n\t" + "ldr r8, [%[a], #292]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #292]\n\t" + "# A[74] * B\n\t" + "ldr r8, [%[a], #296]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #296]\n\t" + "# A[75] * B\n\t" + "ldr r8, [%[a], #300]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #300]\n\t" + "# A[76] * B\n\t" + "ldr r8, [%[a], #304]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #304]\n\t" + "# A[77] * B\n\t" + "ldr r8, [%[a], #308]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #308]\n\t" + "# A[78] * B\n\t" + "ldr r8, [%[a], #312]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #312]\n\t" + "# A[79] * B\n\t" + "ldr r8, [%[a], #316]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #316]\n\t" + "# A[80] * B\n\t" + "ldr r8, [%[a], #320]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #320]\n\t" + "# A[81] * B\n\t" + "ldr r8, [%[a], #324]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #324]\n\t" + "# A[82] * B\n\t" + "ldr r8, [%[a], #328]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #328]\n\t" + "# A[83] * B\n\t" + "ldr r8, [%[a], #332]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #332]\n\t" + "# A[84] * B\n\t" + "ldr r8, [%[a], #336]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #336]\n\t" + "# A[85] * B\n\t" + "ldr r8, [%[a], #340]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #340]\n\t" + "# A[86] * B\n\t" + "ldr r8, [%[a], #344]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #344]\n\t" + "# A[87] * B\n\t" + "ldr r8, [%[a], #348]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #348]\n\t" + "# A[88] * B\n\t" + "ldr r8, [%[a], #352]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #352]\n\t" + "# A[89] * B\n\t" + "ldr r8, [%[a], #356]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #356]\n\t" + "# A[90] * B\n\t" + "ldr r8, [%[a], #360]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #360]\n\t" + "# A[91] * B\n\t" + "ldr r8, [%[a], #364]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #364]\n\t" + "# A[92] * B\n\t" + "ldr r8, [%[a], #368]\n\t" + "mov r4, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, r10\n\t" + "str r5, [%[r], #368]\n\t" + "# A[93] * B\n\t" + "ldr r8, [%[a], #372]\n\t" + "mov r5, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r10\n\t" + "str r3, [%[r], #372]\n\t" + "# A[94] * B\n\t" + "ldr r8, [%[a], #376]\n\t" + "mov r3, #0\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, r10\n\t" + "str r4, [%[r], #376]\n\t" + "# A[95] * B\n\t" + "ldr r8, [%[a], #380]\n\t" + "umull r6, r7, %[b], r8\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "str r5, [%[r], #380]\n\t" + "str r3, [%[r], #384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + ); +#endif +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Sub b from a into a. (a -= b) * @@ -12789,7 +13610,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -14261,817 +15082,6 @@ static void sp_3072_mont_sqr_96(sp_digit* r, sp_digit* a, sp_digit* m, sp_3072_mont_reduce_96(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r5, r3, %[b], r8\n\t" - "mov r4, #0\n\t" - "str r5, [%[r]]\n\t" - "mov r5, #0\n\t" - "mov r9, #4\n\t" - "1:\n\t" - "ldr r8, [%[a], r9]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], r9]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r9, r9, #4\n\t" - "cmp r9, #384\n\t" - "blt 1b\n\t" - "str r3, [%[r], #384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#else - __asm__ __volatile__ ( - "mov r10, #0\n\t" - "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" - "umull r3, r4, %[b], r8\n\t" - "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #4]\n\t" - "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #8]\n\t" - "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #12]\n\t" - "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #16]\n\t" - "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #20]\n\t" - "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #24]\n\t" - "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #28]\n\t" - "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #32]\n\t" - "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #36]\n\t" - "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #40]\n\t" - "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #44]\n\t" - "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #48]\n\t" - "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #52]\n\t" - "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #56]\n\t" - "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #60]\n\t" - "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #64]\n\t" - "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #68]\n\t" - "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #72]\n\t" - "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #76]\n\t" - "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #80]\n\t" - "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #84]\n\t" - "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #88]\n\t" - "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #92]\n\t" - "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #96]\n\t" - "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #100]\n\t" - "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #104]\n\t" - "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #108]\n\t" - "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #112]\n\t" - "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #116]\n\t" - "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #120]\n\t" - "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #124]\n\t" - "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #128]\n\t" - "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #132]\n\t" - "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #136]\n\t" - "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #140]\n\t" - "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #144]\n\t" - "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #148]\n\t" - "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #152]\n\t" - "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #156]\n\t" - "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #160]\n\t" - "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #164]\n\t" - "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #168]\n\t" - "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #172]\n\t" - "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #176]\n\t" - "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #180]\n\t" - "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #184]\n\t" - "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #188]\n\t" - "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #192]\n\t" - "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #196]\n\t" - "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #200]\n\t" - "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #204]\n\t" - "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #208]\n\t" - "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #212]\n\t" - "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #216]\n\t" - "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #220]\n\t" - "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #224]\n\t" - "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #228]\n\t" - "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #232]\n\t" - "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #236]\n\t" - "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #240]\n\t" - "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #244]\n\t" - "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #248]\n\t" - "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #252]\n\t" - "# A[64] * B\n\t" - "ldr r8, [%[a], #256]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #256]\n\t" - "# A[65] * B\n\t" - "ldr r8, [%[a], #260]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #260]\n\t" - "# A[66] * B\n\t" - "ldr r8, [%[a], #264]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #264]\n\t" - "# A[67] * B\n\t" - "ldr r8, [%[a], #268]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #268]\n\t" - "# A[68] * B\n\t" - "ldr r8, [%[a], #272]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #272]\n\t" - "# A[69] * B\n\t" - "ldr r8, [%[a], #276]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #276]\n\t" - "# A[70] * B\n\t" - "ldr r8, [%[a], #280]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #280]\n\t" - "# A[71] * B\n\t" - "ldr r8, [%[a], #284]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #284]\n\t" - "# A[72] * B\n\t" - "ldr r8, [%[a], #288]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #288]\n\t" - "# A[73] * B\n\t" - "ldr r8, [%[a], #292]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #292]\n\t" - "# A[74] * B\n\t" - "ldr r8, [%[a], #296]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #296]\n\t" - "# A[75] * B\n\t" - "ldr r8, [%[a], #300]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #300]\n\t" - "# A[76] * B\n\t" - "ldr r8, [%[a], #304]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #304]\n\t" - "# A[77] * B\n\t" - "ldr r8, [%[a], #308]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #308]\n\t" - "# A[78] * B\n\t" - "ldr r8, [%[a], #312]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #312]\n\t" - "# A[79] * B\n\t" - "ldr r8, [%[a], #316]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #316]\n\t" - "# A[80] * B\n\t" - "ldr r8, [%[a], #320]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #320]\n\t" - "# A[81] * B\n\t" - "ldr r8, [%[a], #324]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #324]\n\t" - "# A[82] * B\n\t" - "ldr r8, [%[a], #328]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #328]\n\t" - "# A[83] * B\n\t" - "ldr r8, [%[a], #332]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #332]\n\t" - "# A[84] * B\n\t" - "ldr r8, [%[a], #336]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #336]\n\t" - "# A[85] * B\n\t" - "ldr r8, [%[a], #340]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #340]\n\t" - "# A[86] * B\n\t" - "ldr r8, [%[a], #344]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #344]\n\t" - "# A[87] * B\n\t" - "ldr r8, [%[a], #348]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #348]\n\t" - "# A[88] * B\n\t" - "ldr r8, [%[a], #352]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #352]\n\t" - "# A[89] * B\n\t" - "ldr r8, [%[a], #356]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #356]\n\t" - "# A[90] * B\n\t" - "ldr r8, [%[a], #360]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #360]\n\t" - "# A[91] * B\n\t" - "ldr r8, [%[a], #364]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #364]\n\t" - "# A[92] * B\n\t" - "ldr r8, [%[a], #368]\n\t" - "mov r4, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "str r5, [%[r], #368]\n\t" - "# A[93] * B\n\t" - "ldr r8, [%[a], #372]\n\t" - "mov r5, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "str r3, [%[r], #372]\n\t" - "# A[94] * B\n\t" - "ldr r8, [%[a], #376]\n\t" - "mov r3, #0\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "str r4, [%[r], #376]\n\t" - "# A[95] * B\n\t" - "ldr r8, [%[a], #380]\n\t" - "umull r6, r7, %[b], r8\n\t" - "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" - "str r5, [%[r], #380]\n\t" - "str r3, [%[r], #384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" - ); -#endif -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -15130,35 +15140,6 @@ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) return r; } -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_3072_mask_96(sp_digit* r, sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<96; i++) - r[i] = a[i] & m; -#else - int i; - - for (i = 0; i < 96; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } -#endif -} - /* Compare a with b in constant time. * * a A single precision integer. @@ -15974,6 +15955,36 @@ static int32_t sp_3072_cmp_96(sp_digit* a, sp_digit* b) return r; } +#if !defined(NO_DH) || (defined(SP_RSA_PRIVATE_EXP_D) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) +/* AND m into each word of a and store in r. + * + * r A single precision integer. + * a A single precision integer. + * m Mask to AND against each digit. + */ +static void sp_3072_mask_96(sp_digit* r, sp_digit* a, sp_digit m) +{ +#ifdef WOLFSSL_SP_SMALL + int i; + + for (i=0; i<96; i++) + r[i] = a[i] & m; +#else + int i; + + for (i = 0; i < 96; i += 8) { + r[i+0] = a[i+0] & m; + r[i+1] = a[i+1] & m; + r[i+2] = a[i+2] & m; + r[i+3] = a[i+3] & m; + r[i+4] = a[i+4] & m; + r[i+5] = a[i+5] & m; + r[i+6] = a[i+6] & m; + r[i+7] = a[i+7] & m; + } +#endif +} + /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -16012,6 +16023,8 @@ static WC_INLINE int sp_3072_div_96(sp_digit* a, sp_digit* d, sp_digit* m, return MP_OKAY; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ +#if !defined(NO_DH) || (defined(SP_RSA_PRIVATE_EXP_D) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) /* Reduce a modulo m into r. (r = a mod m) * * r A single precision number that is the reduced result. @@ -16024,6 +16037,7 @@ static WC_INLINE int sp_3072_mod_96(sp_digit* r, sp_digit* a, sp_digit* m) return sp_3072_div_96(a, m, NULL, r); } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -16477,6 +16491,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -16601,6 +16616,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 42571a2b1..0d38db850 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -1639,7 +1639,8 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -1847,7 +1848,7 @@ static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -1869,7 +1870,339 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 256\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "str x3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr x8, [%[a], 8]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 8]\n\t" + "# A[2] * B\n\t" + "ldr x8, [%[a], 16]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 16]\n\t" + "# A[3] * B\n\t" + "ldr x8, [%[a], 24]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 24]\n\t" + "# A[4] * B\n\t" + "ldr x8, [%[a], 32]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 32]\n\t" + "# A[5] * B\n\t" + "ldr x8, [%[a], 40]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 40]\n\t" + "# A[6] * B\n\t" + "ldr x8, [%[a], 48]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 48]\n\t" + "# A[7] * B\n\t" + "ldr x8, [%[a], 56]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 56]\n\t" + "# A[8] * B\n\t" + "ldr x8, [%[a], 64]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 64]\n\t" + "# A[9] * B\n\t" + "ldr x8, [%[a], 72]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 72]\n\t" + "# A[10] * B\n\t" + "ldr x8, [%[a], 80]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 80]\n\t" + "# A[11] * B\n\t" + "ldr x8, [%[a], 88]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 88]\n\t" + "# A[12] * B\n\t" + "ldr x8, [%[a], 96]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 96]\n\t" + "# A[13] * B\n\t" + "ldr x8, [%[a], 104]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 104]\n\t" + "# A[14] * B\n\t" + "ldr x8, [%[a], 112]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 112]\n\t" + "# A[15] * B\n\t" + "ldr x8, [%[a], 120]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 120]\n\t" + "# A[16] * B\n\t" + "ldr x8, [%[a], 128]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 128]\n\t" + "# A[17] * B\n\t" + "ldr x8, [%[a], 136]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 136]\n\t" + "# A[18] * B\n\t" + "ldr x8, [%[a], 144]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 144]\n\t" + "# A[19] * B\n\t" + "ldr x8, [%[a], 152]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 152]\n\t" + "# A[20] * B\n\t" + "ldr x8, [%[a], 160]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 160]\n\t" + "# A[21] * B\n\t" + "ldr x8, [%[a], 168]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 168]\n\t" + "# A[22] * B\n\t" + "ldr x8, [%[a], 176]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 176]\n\t" + "# A[23] * B\n\t" + "ldr x8, [%[a], 184]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 184]\n\t" + "# A[24] * B\n\t" + "ldr x8, [%[a], 192]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 192]\n\t" + "# A[25] * B\n\t" + "ldr x8, [%[a], 200]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 200]\n\t" + "# A[26] * B\n\t" + "ldr x8, [%[a], 208]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 208]\n\t" + "# A[27] * B\n\t" + "ldr x8, [%[a], 216]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 216]\n\t" + "# A[28] * B\n\t" + "ldr x8, [%[a], 224]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 224]\n\t" + "# A[29] * B\n\t" + "ldr x8, [%[a], 232]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 232]\n\t" + "# A[30] * B\n\t" + "ldr x8, [%[a], 240]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 240]\n\t" + "# A[31] * B\n\t" + "ldr x8, [%[a], 248]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adc x5, x5, x7\n\t" + "str x4, [%[r], 248]\n\t" + "str x5, [%[r], 256]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -2969,7 +3302,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -3565,337 +3898,6 @@ static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m, sp_2048_mont_reduce_32(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "# A[0] * B\n\t" - "ldr x8, [%[a]]\n\t" - "mul x5, %[b], x8\n\t" - "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" - "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" - "mov x9, #8\n\t" - "1:\n\t" - "ldr x8, [%[a], x9]\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], x9]\n\t" - "mov x3, x4\n\t" - "mov x4, x5\n\t" - "mov x5, #0\n\t" - "add x9, x9, #8\n\t" - "cmp x9, 256\n\t" - "b.lt 1b\n\t" - "str x3, [%[r], 256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8" - ); -#else - __asm__ __volatile__ ( - "# A[0] * B\n\t" - "ldr x8, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" - "str x3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr x8, [%[a], 8]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 8]\n\t" - "# A[2] * B\n\t" - "ldr x8, [%[a], 16]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 16]\n\t" - "# A[3] * B\n\t" - "ldr x8, [%[a], 24]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 24]\n\t" - "# A[4] * B\n\t" - "ldr x8, [%[a], 32]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 32]\n\t" - "# A[5] * B\n\t" - "ldr x8, [%[a], 40]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 40]\n\t" - "# A[6] * B\n\t" - "ldr x8, [%[a], 48]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 48]\n\t" - "# A[7] * B\n\t" - "ldr x8, [%[a], 56]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 56]\n\t" - "# A[8] * B\n\t" - "ldr x8, [%[a], 64]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 64]\n\t" - "# A[9] * B\n\t" - "ldr x8, [%[a], 72]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 72]\n\t" - "# A[10] * B\n\t" - "ldr x8, [%[a], 80]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 80]\n\t" - "# A[11] * B\n\t" - "ldr x8, [%[a], 88]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 88]\n\t" - "# A[12] * B\n\t" - "ldr x8, [%[a], 96]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 96]\n\t" - "# A[13] * B\n\t" - "ldr x8, [%[a], 104]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 104]\n\t" - "# A[14] * B\n\t" - "ldr x8, [%[a], 112]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 112]\n\t" - "# A[15] * B\n\t" - "ldr x8, [%[a], 120]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 120]\n\t" - "# A[16] * B\n\t" - "ldr x8, [%[a], 128]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 128]\n\t" - "# A[17] * B\n\t" - "ldr x8, [%[a], 136]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 136]\n\t" - "# A[18] * B\n\t" - "ldr x8, [%[a], 144]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 144]\n\t" - "# A[19] * B\n\t" - "ldr x8, [%[a], 152]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 152]\n\t" - "# A[20] * B\n\t" - "ldr x8, [%[a], 160]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 160]\n\t" - "# A[21] * B\n\t" - "ldr x8, [%[a], 168]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 168]\n\t" - "# A[22] * B\n\t" - "ldr x8, [%[a], 176]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 176]\n\t" - "# A[23] * B\n\t" - "ldr x8, [%[a], 184]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 184]\n\t" - "# A[24] * B\n\t" - "ldr x8, [%[a], 192]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 192]\n\t" - "# A[25] * B\n\t" - "ldr x8, [%[a], 200]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 200]\n\t" - "# A[26] * B\n\t" - "ldr x8, [%[a], 208]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 208]\n\t" - "# A[27] * B\n\t" - "ldr x8, [%[a], 216]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 216]\n\t" - "# A[28] * B\n\t" - "ldr x8, [%[a], 224]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 224]\n\t" - "# A[29] * B\n\t" - "ldr x8, [%[a], 232]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 232]\n\t" - "# A[30] * B\n\t" - "ldr x8, [%[a], 240]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 240]\n\t" - "# A[31] * B\n\t" - "ldr x8, [%[a], 248]\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adc x5, x5, x7\n\t" - "str x4, [%[r], 248]\n\t" - "str x5, [%[r], 256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8" - ); -#endif -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -7862,7 +7864,8 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -8070,7 +8073,7 @@ static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -8092,7 +8095,483 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x5, %[b], x8\n\t" + "umulh x3, %[b], x8\n\t" + "mov x4, 0\n\t" + "str x5, [%[r]]\n\t" + "mov x5, 0\n\t" + "mov x9, #8\n\t" + "1:\n\t" + "ldr x8, [%[a], x9]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], x9]\n\t" + "mov x3, x4\n\t" + "mov x4, x5\n\t" + "mov x5, #0\n\t" + "add x9, x9, #8\n\t" + "cmp x9, 384\n\t" + "b.lt 1b\n\t" + "str x3, [%[r], 384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + "# A[0] * B\n\t" + "ldr x8, [%[a]]\n\t" + "mul x3, %[b], x8\n\t" + "umulh x4, %[b], x8\n\t" + "mov x5, 0\n\t" + "str x3, [%[r]]\n\t" + "# A[1] * B\n\t" + "ldr x8, [%[a], 8]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 8]\n\t" + "# A[2] * B\n\t" + "ldr x8, [%[a], 16]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 16]\n\t" + "# A[3] * B\n\t" + "ldr x8, [%[a], 24]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 24]\n\t" + "# A[4] * B\n\t" + "ldr x8, [%[a], 32]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 32]\n\t" + "# A[5] * B\n\t" + "ldr x8, [%[a], 40]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 40]\n\t" + "# A[6] * B\n\t" + "ldr x8, [%[a], 48]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 48]\n\t" + "# A[7] * B\n\t" + "ldr x8, [%[a], 56]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 56]\n\t" + "# A[8] * B\n\t" + "ldr x8, [%[a], 64]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 64]\n\t" + "# A[9] * B\n\t" + "ldr x8, [%[a], 72]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 72]\n\t" + "# A[10] * B\n\t" + "ldr x8, [%[a], 80]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 80]\n\t" + "# A[11] * B\n\t" + "ldr x8, [%[a], 88]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 88]\n\t" + "# A[12] * B\n\t" + "ldr x8, [%[a], 96]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 96]\n\t" + "# A[13] * B\n\t" + "ldr x8, [%[a], 104]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 104]\n\t" + "# A[14] * B\n\t" + "ldr x8, [%[a], 112]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 112]\n\t" + "# A[15] * B\n\t" + "ldr x8, [%[a], 120]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 120]\n\t" + "# A[16] * B\n\t" + "ldr x8, [%[a], 128]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 128]\n\t" + "# A[17] * B\n\t" + "ldr x8, [%[a], 136]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 136]\n\t" + "# A[18] * B\n\t" + "ldr x8, [%[a], 144]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 144]\n\t" + "# A[19] * B\n\t" + "ldr x8, [%[a], 152]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 152]\n\t" + "# A[20] * B\n\t" + "ldr x8, [%[a], 160]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 160]\n\t" + "# A[21] * B\n\t" + "ldr x8, [%[a], 168]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 168]\n\t" + "# A[22] * B\n\t" + "ldr x8, [%[a], 176]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 176]\n\t" + "# A[23] * B\n\t" + "ldr x8, [%[a], 184]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 184]\n\t" + "# A[24] * B\n\t" + "ldr x8, [%[a], 192]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 192]\n\t" + "# A[25] * B\n\t" + "ldr x8, [%[a], 200]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 200]\n\t" + "# A[26] * B\n\t" + "ldr x8, [%[a], 208]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 208]\n\t" + "# A[27] * B\n\t" + "ldr x8, [%[a], 216]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 216]\n\t" + "# A[28] * B\n\t" + "ldr x8, [%[a], 224]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 224]\n\t" + "# A[29] * B\n\t" + "ldr x8, [%[a], 232]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 232]\n\t" + "# A[30] * B\n\t" + "ldr x8, [%[a], 240]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 240]\n\t" + "# A[31] * B\n\t" + "ldr x8, [%[a], 248]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 248]\n\t" + "# A[32] * B\n\t" + "ldr x8, [%[a], 256]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 256]\n\t" + "# A[33] * B\n\t" + "ldr x8, [%[a], 264]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 264]\n\t" + "# A[34] * B\n\t" + "ldr x8, [%[a], 272]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 272]\n\t" + "# A[35] * B\n\t" + "ldr x8, [%[a], 280]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 280]\n\t" + "# A[36] * B\n\t" + "ldr x8, [%[a], 288]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 288]\n\t" + "# A[37] * B\n\t" + "ldr x8, [%[a], 296]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 296]\n\t" + "# A[38] * B\n\t" + "ldr x8, [%[a], 304]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 304]\n\t" + "# A[39] * B\n\t" + "ldr x8, [%[a], 312]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 312]\n\t" + "# A[40] * B\n\t" + "ldr x8, [%[a], 320]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 320]\n\t" + "# A[41] * B\n\t" + "ldr x8, [%[a], 328]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 328]\n\t" + "# A[42] * B\n\t" + "ldr x8, [%[a], 336]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 336]\n\t" + "# A[43] * B\n\t" + "ldr x8, [%[a], 344]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 344]\n\t" + "# A[44] * B\n\t" + "ldr x8, [%[a], 352]\n\t" + "mov x4, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adcs x3, x3, x7\n\t" + "adc x4, xzr, xzr\n\t" + "str x5, [%[r], 352]\n\t" + "# A[45] * B\n\t" + "ldr x8, [%[a], 360]\n\t" + "mov x5, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x3, x3, x6\n\t" + "adcs x4, x4, x7\n\t" + "adc x5, xzr, xzr\n\t" + "str x3, [%[r], 360]\n\t" + "# A[46] * B\n\t" + "ldr x8, [%[a], 368]\n\t" + "mov x3, 0\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x4, x4, x6\n\t" + "adcs x5, x5, x7\n\t" + "adc x3, xzr, xzr\n\t" + "str x4, [%[r], 368]\n\t" + "# A[47] * B\n\t" + "ldr x8, [%[a], 376]\n\t" + "mul x6, %[b], x8\n\t" + "umulh x7, %[b], x8\n\t" + "adds x5, x5, x6\n\t" + "adc x3, x3, x7\n\t" + "str x5, [%[r], 376]\n\t" + "str x3, [%[r], 384]\n\t" + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) + : "memory", "x3", "x4", "x5", "x6", "x7", "x8" + ); +#endif +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. * @@ -9448,7 +9927,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -10284,481 +10763,6 @@ static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m, sp_3072_mont_reduce_48(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "# A[0] * B\n\t" - "ldr x8, [%[a]]\n\t" - "mul x5, %[b], x8\n\t" - "umulh x3, %[b], x8\n\t" - "mov x4, 0\n\t" - "str x5, [%[r]]\n\t" - "mov x5, 0\n\t" - "mov x9, #8\n\t" - "1:\n\t" - "ldr x8, [%[a], x9]\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], x9]\n\t" - "mov x3, x4\n\t" - "mov x4, x5\n\t" - "mov x5, #0\n\t" - "add x9, x9, #8\n\t" - "cmp x9, 384\n\t" - "b.lt 1b\n\t" - "str x3, [%[r], 384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8" - ); -#else - __asm__ __volatile__ ( - "# A[0] * B\n\t" - "ldr x8, [%[a]]\n\t" - "mul x3, %[b], x8\n\t" - "umulh x4, %[b], x8\n\t" - "mov x5, 0\n\t" - "str x3, [%[r]]\n\t" - "# A[1] * B\n\t" - "ldr x8, [%[a], 8]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 8]\n\t" - "# A[2] * B\n\t" - "ldr x8, [%[a], 16]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 16]\n\t" - "# A[3] * B\n\t" - "ldr x8, [%[a], 24]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 24]\n\t" - "# A[4] * B\n\t" - "ldr x8, [%[a], 32]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 32]\n\t" - "# A[5] * B\n\t" - "ldr x8, [%[a], 40]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 40]\n\t" - "# A[6] * B\n\t" - "ldr x8, [%[a], 48]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 48]\n\t" - "# A[7] * B\n\t" - "ldr x8, [%[a], 56]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 56]\n\t" - "# A[8] * B\n\t" - "ldr x8, [%[a], 64]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 64]\n\t" - "# A[9] * B\n\t" - "ldr x8, [%[a], 72]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 72]\n\t" - "# A[10] * B\n\t" - "ldr x8, [%[a], 80]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 80]\n\t" - "# A[11] * B\n\t" - "ldr x8, [%[a], 88]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 88]\n\t" - "# A[12] * B\n\t" - "ldr x8, [%[a], 96]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 96]\n\t" - "# A[13] * B\n\t" - "ldr x8, [%[a], 104]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 104]\n\t" - "# A[14] * B\n\t" - "ldr x8, [%[a], 112]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 112]\n\t" - "# A[15] * B\n\t" - "ldr x8, [%[a], 120]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 120]\n\t" - "# A[16] * B\n\t" - "ldr x8, [%[a], 128]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 128]\n\t" - "# A[17] * B\n\t" - "ldr x8, [%[a], 136]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 136]\n\t" - "# A[18] * B\n\t" - "ldr x8, [%[a], 144]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 144]\n\t" - "# A[19] * B\n\t" - "ldr x8, [%[a], 152]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 152]\n\t" - "# A[20] * B\n\t" - "ldr x8, [%[a], 160]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 160]\n\t" - "# A[21] * B\n\t" - "ldr x8, [%[a], 168]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 168]\n\t" - "# A[22] * B\n\t" - "ldr x8, [%[a], 176]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 176]\n\t" - "# A[23] * B\n\t" - "ldr x8, [%[a], 184]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 184]\n\t" - "# A[24] * B\n\t" - "ldr x8, [%[a], 192]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 192]\n\t" - "# A[25] * B\n\t" - "ldr x8, [%[a], 200]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 200]\n\t" - "# A[26] * B\n\t" - "ldr x8, [%[a], 208]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 208]\n\t" - "# A[27] * B\n\t" - "ldr x8, [%[a], 216]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 216]\n\t" - "# A[28] * B\n\t" - "ldr x8, [%[a], 224]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 224]\n\t" - "# A[29] * B\n\t" - "ldr x8, [%[a], 232]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 232]\n\t" - "# A[30] * B\n\t" - "ldr x8, [%[a], 240]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 240]\n\t" - "# A[31] * B\n\t" - "ldr x8, [%[a], 248]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 248]\n\t" - "# A[32] * B\n\t" - "ldr x8, [%[a], 256]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 256]\n\t" - "# A[33] * B\n\t" - "ldr x8, [%[a], 264]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 264]\n\t" - "# A[34] * B\n\t" - "ldr x8, [%[a], 272]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 272]\n\t" - "# A[35] * B\n\t" - "ldr x8, [%[a], 280]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 280]\n\t" - "# A[36] * B\n\t" - "ldr x8, [%[a], 288]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 288]\n\t" - "# A[37] * B\n\t" - "ldr x8, [%[a], 296]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 296]\n\t" - "# A[38] * B\n\t" - "ldr x8, [%[a], 304]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 304]\n\t" - "# A[39] * B\n\t" - "ldr x8, [%[a], 312]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 312]\n\t" - "# A[40] * B\n\t" - "ldr x8, [%[a], 320]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 320]\n\t" - "# A[41] * B\n\t" - "ldr x8, [%[a], 328]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 328]\n\t" - "# A[42] * B\n\t" - "ldr x8, [%[a], 336]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 336]\n\t" - "# A[43] * B\n\t" - "ldr x8, [%[a], 344]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 344]\n\t" - "# A[44] * B\n\t" - "ldr x8, [%[a], 352]\n\t" - "mov x4, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adcs x3, x3, x7\n\t" - "adc x4, xzr, xzr\n\t" - "str x5, [%[r], 352]\n\t" - "# A[45] * B\n\t" - "ldr x8, [%[a], 360]\n\t" - "mov x5, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x3, x3, x6\n\t" - "adcs x4, x4, x7\n\t" - "adc x5, xzr, xzr\n\t" - "str x3, [%[r], 360]\n\t" - "# A[46] * B\n\t" - "ldr x8, [%[a], 368]\n\t" - "mov x3, 0\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x4, x4, x6\n\t" - "adcs x5, x5, x7\n\t" - "adc x3, xzr, xzr\n\t" - "str x4, [%[r], 368]\n\t" - "# A[47] * B\n\t" - "ldr x8, [%[a], 376]\n\t" - "mul x6, %[b], x8\n\t" - "umulh x7, %[b], x8\n\t" - "adds x5, x5, x6\n\t" - "adc x3, x3, x7\n\t" - "str x5, [%[r], 376]\n\t" - "str x3, [%[r], 384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8" - ); -#endif -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index b3e6e0284..b9afc69e2 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -2303,7 +2303,8 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -2738,7 +2739,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -2759,7 +2760,84 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" +#ifdef WOLFSSL_SP_ARM_THUMB_ASM_CORTEX_M + "ldr r6, [%[a]]\n\t" + "umull r6, r7, r6, %[b]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" +#else + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" +#endif + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -3598,7 +3676,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -3843,82 +3921,6 @@ static void sp_2048_mont_sqr_64(sp_digit* r, sp_digit* a, sp_digit* m, sp_2048_mont_reduce_64(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ - __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "# A[] * B\n\t" -#ifdef WOLFSSL_SP_ARM_THUMB_ASM_CORTEX_M - "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, %[b]\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" -#else - "ldr r6, [%[a]]\n\t" - "lsl r6, r6, #16\n\t" - "lsl r7, %[b], #16\n\t" - "lsr r6, r6, #16\n\t" - "lsr r7, r7, #16\n\t" - "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" - "lsr r7, %[b], #16\n\t" - "mul r6, r7\n\t" - "lsr r7, r6, #16\n\t" - "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" - "ldr r6, [%[a]]\n\t" - "lsr r6, r6, #16\n\t" - "lsr r7, %[b], #16\n\t" - "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" - "lsl r7, %[b], #16\n\t" - "lsr r7, r7, #16\n\t" - "mul r6, r7\n\t" - "lsr r7, r6, #16\n\t" - "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" -#endif - "# A[] * B - Done\n\t" - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" - "blt 1b\n\t" - "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" - ); -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -7679,7 +7681,8 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -8084,7 +8087,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -8105,7 +8108,85 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) *rho = -x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ + __asm__ __volatile__ ( + "mov r6, #1\n\t" + "lsl r6, r6, #8\n\t" + "add r6, #128\n\t" + "add r6, %[a]\n\t" + "mov r8, %[r]\n\t" + "mov r9, r6\n\t" + "mov r3, #0\n\t" + "mov r4, #0\n\t" + "1:\n\t" + "mov %[r], #0\n\t" + "mov r5, #0\n\t" + "# A[] * B\n\t" +#ifdef WOLFSSL_SP_ARM_THUMB_ASM_CORTEX_M + "ldr r6, [%[a]]\n\t" + "umull r6, r7, r6, %[b]\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" +#else + "ldr r6, [%[a]]\n\t" + "lsl r6, r6, #16\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6\n\t" + "add r3, r7\n\t" + "adc r4, %[r]\n\t" + "adc r5, %[r]\n\t" + "lsr r7, %[b], #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" + "ldr r6, [%[a]]\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, %[b], #16\n\t" + "mul r7, r6\n\t" + "add r4, r7\n\t" + "adc r5, %[r]\n\t" + "lsl r7, %[b], #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "add r3, r6\n\t" + "adc r4, r7\n\t" + "adc r5, %[r]\n\t" +#endif + "# A[] * B - Done\n\t" + "mov %[r], r8\n\t" + "str r3, [%[r]]\n\t" + "mov r3, r4\n\t" + "mov r4, r5\n\t" + "add %[r], #4\n\t" + "add %[a], #4\n\t" + "mov r8, %[r]\n\t" + "cmp %[a], r9\n\t" + "blt 1b\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + ); +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Sub b from a into a. (a -= b) * @@ -9201,7 +9282,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -9450,83 +9531,6 @@ static void sp_3072_mont_sqr_96(sp_digit* r, sp_digit* a, sp_digit* m, sp_3072_mont_reduce_96(r, m, mp); } -/* Mul a by digit b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision digit. - */ -SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ - __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, %[a]\n\t" - "mov r8, %[r]\n\t" - "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "# A[] * B\n\t" -#ifdef WOLFSSL_SP_ARM_THUMB_ASM_CORTEX_M - "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, %[b]\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" -#else - "ldr r6, [%[a]]\n\t" - "lsl r6, r6, #16\n\t" - "lsl r7, %[b], #16\n\t" - "lsr r6, r6, #16\n\t" - "lsr r7, r7, #16\n\t" - "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" - "lsr r7, %[b], #16\n\t" - "mul r6, r7\n\t" - "lsr r7, r6, #16\n\t" - "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" - "ldr r6, [%[a]]\n\t" - "lsr r6, r6, #16\n\t" - "lsr r7, %[b], #16\n\t" - "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" - "lsl r7, %[b], #16\n\t" - "lsr r7, r7, #16\n\t" - "mul r6, r7\n\t" - "lsr r7, r6, #16\n\t" - "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" - "adc r4, r7\n\t" - "adc r5, %[r]\n\t" -#endif - "# A[] * B - Done\n\t" - "mov %[r], r8\n\t" - "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" - "mov r8, %[r]\n\t" - "cmp %[a], r9\n\t" - "blt 1b\n\t" - "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" - ); -} - /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) * * d1 The high order half of the number to divide. diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 0ce21e4e4..37f023068 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -50,7 +50,7 @@ #ifndef WOLFSSL_SP_ASM #if SP_WORD_SIZE == 32 -#if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL) +#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* Mask for address to obfuscate which of the two address will be used. */ static const size_t addr_mask[2] = { 0, (size_t)-1 }; #endif @@ -1032,7 +1032,8 @@ SP_NOINLINE static void sp_2048_sqr_90(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -1168,7 +1169,7 @@ SP_NOINLINE static void sp_2048_sqr_45(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -1190,7 +1191,58 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) *rho = (1L << 23) - x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 90; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[90] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 88; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + t[1] = tb * a[89]; + r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[90] = (sp_digit)(t[1] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -1556,56 +1608,6 @@ SP_NOINLINE static void sp_2048_mul_d_45(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } -/* Multiply a by scalar b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A scalar. - */ -SP_NOINLINE static void sp_2048_mul_d_90(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - int64_t tb = b; - int64_t t = 0; - int i; - - for (i = 0; i < 90; i++) { - t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; - } - r[90] = (sp_digit)t; -#else - int64_t tb = b; - int64_t t[8]; - int i; - - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; - for (i = 0; i < 88; i += 8) { - t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); - t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); - t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); - t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); - t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); - } - t[1] = tb * a[89]; - r[89] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); - r[90] = (sp_digit)(t[1] >> 23); -#endif /* WOLFSSL_SP_SMALL */ -} - /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -1663,52 +1665,22 @@ SP_NOINLINE static int sp_2048_add_45(sp_digit* r, const sp_digit* a, #endif SP_NOINLINE static void sp_2048_rshift_45(sp_digit* r, sp_digit* a, byte n) { -#ifdef WOLFSSL_SP_SMALL int i; +#ifdef WOLFSSL_SP_SMALL for (i=0; i<44; i++) r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; #else - r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff; - r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff; - r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff; - r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff; - r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff; - r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff; - r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff; - r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff; - r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff; - r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff; - r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff; - r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff; - r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff; - r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff; - r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff; - r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff; - r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff; - r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff; - r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff; - r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff; - r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff; - r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff; - r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff; - r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff; - r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff; - r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff; - r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff; - r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff; - r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff; - r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff; - r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff; - r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff; - r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff; - r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff; - r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff; - r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff; - r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff; - r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff; - r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff; - r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff; + for (i=0; i<40; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff; r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff; r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff; @@ -2121,7 +2093,7 @@ static int sp_2048_mod_exp_45(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif } -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. @@ -2366,6 +2338,7 @@ static void sp_2048_mont_reduce_90(sp_digit* a, sp_digit* m, sp_digit mp) int i; sp_digit mu; +#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<89; i++) { mu = (a[i] * mp) & 0x7fffff; @@ -2388,6 +2361,17 @@ static void sp_2048_mont_reduce_90(sp_digit* a, sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 23; a[i] &= 0x7fffff; } +#else + for (i=0; i<89; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1l; + sp_2048_mul_add_90(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; +#endif sp_2048_mont_shift_90(a, a); sp_2048_cond_sub_90(a, a, m, 0 - ((a[89] >> 1) > 0)); @@ -2551,100 +2535,22 @@ SP_NOINLINE static int sp_2048_add_90(sp_digit* r, const sp_digit* a, #endif SP_NOINLINE static void sp_2048_rshift_90(sp_digit* r, sp_digit* a, byte n) { -#ifdef WOLFSSL_SP_SMALL int i; +#ifdef WOLFSSL_SP_SMALL for (i=0; i<89; i++) r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; #else - r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff; - r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff; - r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff; - r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff; - r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff; - r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff; - r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff; - r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff; - r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff; - r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff; - r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff; - r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff; - r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff; - r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff; - r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff; - r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff; - r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff; - r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff; - r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff; - r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff; - r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff; - r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff; - r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff; - r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff; - r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff; - r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff; - r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff; - r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff; - r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff; - r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff; - r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff; - r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff; - r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff; - r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff; - r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff; - r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff; - r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff; - r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff; - r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff; - r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff; - r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff; - r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff; - r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff; - r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff; - r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff; - r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff; - r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff; - r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff; - r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff; - r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff; - r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff; - r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff; - r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff; - r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff; - r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff; - r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff; - r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff; - r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff; - r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff; - r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff; - r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff; - r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff; - r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff; - r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff; - r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff; - r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff; - r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff; - r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff; - r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff; - r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff; - r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff; - r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff; - r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff; - r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff; - r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff; - r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff; - r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff; - r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff; - r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff; - r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff; - r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff; - r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff; - r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff; - r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff; - r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff; - r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff; - r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff; - r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff; + for (i=0; i<88; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff; #endif r[89] = a[89] >> n; @@ -3057,7 +2963,7 @@ static int sp_2048_mod_exp_90(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* AND m into each word of a and store in r. * * r A single precision integer. @@ -3293,6 +3199,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif /* WOLFSSL_SP_SMALL */ } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -3527,6 +3434,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. @@ -4437,7 +4345,8 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -4542,7 +4451,7 @@ SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -4564,7 +4473,56 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) *rho = (1L << 23) - x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int64_t tb = b; + int64_t t = 0; + int i; + + for (i = 0; i < 136; i++) { + t += tb * a[i]; + r[i] = t & 0x7fffff; + t >>= 23; + } + r[136] = (sp_digit)t; +#else + int64_t tb = b; + int64_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + for (i = 0; i < 136; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + } + r[136] = (sp_digit)(t[7] >> 23); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. * @@ -5399,7 +5357,7 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif } -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. @@ -5649,6 +5607,7 @@ static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) int i; sp_digit mu; +#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<135; i++) { mu = (a[i] * mp) & 0x7fffff; @@ -5671,6 +5630,17 @@ static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 23; a[i] &= 0x7fffff; } +#else + for (i=0; i<135; i++) { + mu = (a[i] * mp) & 0x7fffff; + sp_3072_mul_add_136(a+i, m, mu); + a[i+1] += a[i] >> 23; + } + mu = (a[i] * mp) & 0x1fffl; + sp_3072_mul_add_136(a+i, m, mu); + a[i+1] += a[i] >> 23; + a[i] &= 0x7fffff; +#endif sp_3072_mont_shift_136(a, a); sp_3072_cond_sub_136(a, a, m, 0 - ((a[135] >> 13) > 0)); @@ -5707,54 +5677,6 @@ static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m, sp_3072_mont_reduce_136(r, m, mp); } -/* Multiply a by scalar b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A scalar. - */ -SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - int64_t tb = b; - int64_t t = 0; - int i; - - for (i = 0; i < 136; i++) { - t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; - } - r[136] = (sp_digit)t; -#else - int64_t tb = b; - int64_t t[8]; - int i; - - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; - for (i = 0; i < 136; i += 8) { - t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); - t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); - t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); - t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); - t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); - } - r[136] = (sp_digit)(t[7] >> 23); -#endif /* WOLFSSL_SP_SMALL */ -} - /* Multiply a by scalar b into r. (r = a * b) * * r A single precision integer. @@ -5874,147 +5796,22 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, #endif SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n) { -#ifdef WOLFSSL_SP_SMALL int i; +#ifdef WOLFSSL_SP_SMALL for (i=0; i<135; i++) r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; #else - r[0] = ((a[0] >> n) | (a[1] << (23 - n))) & 0x7fffff; - r[1] = ((a[1] >> n) | (a[2] << (23 - n))) & 0x7fffff; - r[2] = ((a[2] >> n) | (a[3] << (23 - n))) & 0x7fffff; - r[3] = ((a[3] >> n) | (a[4] << (23 - n))) & 0x7fffff; - r[4] = ((a[4] >> n) | (a[5] << (23 - n))) & 0x7fffff; - r[5] = ((a[5] >> n) | (a[6] << (23 - n))) & 0x7fffff; - r[6] = ((a[6] >> n) | (a[7] << (23 - n))) & 0x7fffff; - r[7] = ((a[7] >> n) | (a[8] << (23 - n))) & 0x7fffff; - r[8] = ((a[8] >> n) | (a[9] << (23 - n))) & 0x7fffff; - r[9] = ((a[9] >> n) | (a[10] << (23 - n))) & 0x7fffff; - r[10] = ((a[10] >> n) | (a[11] << (23 - n))) & 0x7fffff; - r[11] = ((a[11] >> n) | (a[12] << (23 - n))) & 0x7fffff; - r[12] = ((a[12] >> n) | (a[13] << (23 - n))) & 0x7fffff; - r[13] = ((a[13] >> n) | (a[14] << (23 - n))) & 0x7fffff; - r[14] = ((a[14] >> n) | (a[15] << (23 - n))) & 0x7fffff; - r[15] = ((a[15] >> n) | (a[16] << (23 - n))) & 0x7fffff; - r[16] = ((a[16] >> n) | (a[17] << (23 - n))) & 0x7fffff; - r[17] = ((a[17] >> n) | (a[18] << (23 - n))) & 0x7fffff; - r[18] = ((a[18] >> n) | (a[19] << (23 - n))) & 0x7fffff; - r[19] = ((a[19] >> n) | (a[20] << (23 - n))) & 0x7fffff; - r[20] = ((a[20] >> n) | (a[21] << (23 - n))) & 0x7fffff; - r[21] = ((a[21] >> n) | (a[22] << (23 - n))) & 0x7fffff; - r[22] = ((a[22] >> n) | (a[23] << (23 - n))) & 0x7fffff; - r[23] = ((a[23] >> n) | (a[24] << (23 - n))) & 0x7fffff; - r[24] = ((a[24] >> n) | (a[25] << (23 - n))) & 0x7fffff; - r[25] = ((a[25] >> n) | (a[26] << (23 - n))) & 0x7fffff; - r[26] = ((a[26] >> n) | (a[27] << (23 - n))) & 0x7fffff; - r[27] = ((a[27] >> n) | (a[28] << (23 - n))) & 0x7fffff; - r[28] = ((a[28] >> n) | (a[29] << (23 - n))) & 0x7fffff; - r[29] = ((a[29] >> n) | (a[30] << (23 - n))) & 0x7fffff; - r[30] = ((a[30] >> n) | (a[31] << (23 - n))) & 0x7fffff; - r[31] = ((a[31] >> n) | (a[32] << (23 - n))) & 0x7fffff; - r[32] = ((a[32] >> n) | (a[33] << (23 - n))) & 0x7fffff; - r[33] = ((a[33] >> n) | (a[34] << (23 - n))) & 0x7fffff; - r[34] = ((a[34] >> n) | (a[35] << (23 - n))) & 0x7fffff; - r[35] = ((a[35] >> n) | (a[36] << (23 - n))) & 0x7fffff; - r[36] = ((a[36] >> n) | (a[37] << (23 - n))) & 0x7fffff; - r[37] = ((a[37] >> n) | (a[38] << (23 - n))) & 0x7fffff; - r[38] = ((a[38] >> n) | (a[39] << (23 - n))) & 0x7fffff; - r[39] = ((a[39] >> n) | (a[40] << (23 - n))) & 0x7fffff; - r[40] = ((a[40] >> n) | (a[41] << (23 - n))) & 0x7fffff; - r[41] = ((a[41] >> n) | (a[42] << (23 - n))) & 0x7fffff; - r[42] = ((a[42] >> n) | (a[43] << (23 - n))) & 0x7fffff; - r[43] = ((a[43] >> n) | (a[44] << (23 - n))) & 0x7fffff; - r[44] = ((a[44] >> n) | (a[45] << (23 - n))) & 0x7fffff; - r[45] = ((a[45] >> n) | (a[46] << (23 - n))) & 0x7fffff; - r[46] = ((a[46] >> n) | (a[47] << (23 - n))) & 0x7fffff; - r[47] = ((a[47] >> n) | (a[48] << (23 - n))) & 0x7fffff; - r[48] = ((a[48] >> n) | (a[49] << (23 - n))) & 0x7fffff; - r[49] = ((a[49] >> n) | (a[50] << (23 - n))) & 0x7fffff; - r[50] = ((a[50] >> n) | (a[51] << (23 - n))) & 0x7fffff; - r[51] = ((a[51] >> n) | (a[52] << (23 - n))) & 0x7fffff; - r[52] = ((a[52] >> n) | (a[53] << (23 - n))) & 0x7fffff; - r[53] = ((a[53] >> n) | (a[54] << (23 - n))) & 0x7fffff; - r[54] = ((a[54] >> n) | (a[55] << (23 - n))) & 0x7fffff; - r[55] = ((a[55] >> n) | (a[56] << (23 - n))) & 0x7fffff; - r[56] = ((a[56] >> n) | (a[57] << (23 - n))) & 0x7fffff; - r[57] = ((a[57] >> n) | (a[58] << (23 - n))) & 0x7fffff; - r[58] = ((a[58] >> n) | (a[59] << (23 - n))) & 0x7fffff; - r[59] = ((a[59] >> n) | (a[60] << (23 - n))) & 0x7fffff; - r[60] = ((a[60] >> n) | (a[61] << (23 - n))) & 0x7fffff; - r[61] = ((a[61] >> n) | (a[62] << (23 - n))) & 0x7fffff; - r[62] = ((a[62] >> n) | (a[63] << (23 - n))) & 0x7fffff; - r[63] = ((a[63] >> n) | (a[64] << (23 - n))) & 0x7fffff; - r[64] = ((a[64] >> n) | (a[65] << (23 - n))) & 0x7fffff; - r[65] = ((a[65] >> n) | (a[66] << (23 - n))) & 0x7fffff; - r[66] = ((a[66] >> n) | (a[67] << (23 - n))) & 0x7fffff; - r[67] = ((a[67] >> n) | (a[68] << (23 - n))) & 0x7fffff; - r[68] = ((a[68] >> n) | (a[69] << (23 - n))) & 0x7fffff; - r[69] = ((a[69] >> n) | (a[70] << (23 - n))) & 0x7fffff; - r[70] = ((a[70] >> n) | (a[71] << (23 - n))) & 0x7fffff; - r[71] = ((a[71] >> n) | (a[72] << (23 - n))) & 0x7fffff; - r[72] = ((a[72] >> n) | (a[73] << (23 - n))) & 0x7fffff; - r[73] = ((a[73] >> n) | (a[74] << (23 - n))) & 0x7fffff; - r[74] = ((a[74] >> n) | (a[75] << (23 - n))) & 0x7fffff; - r[75] = ((a[75] >> n) | (a[76] << (23 - n))) & 0x7fffff; - r[76] = ((a[76] >> n) | (a[77] << (23 - n))) & 0x7fffff; - r[77] = ((a[77] >> n) | (a[78] << (23 - n))) & 0x7fffff; - r[78] = ((a[78] >> n) | (a[79] << (23 - n))) & 0x7fffff; - r[79] = ((a[79] >> n) | (a[80] << (23 - n))) & 0x7fffff; - r[80] = ((a[80] >> n) | (a[81] << (23 - n))) & 0x7fffff; - r[81] = ((a[81] >> n) | (a[82] << (23 - n))) & 0x7fffff; - r[82] = ((a[82] >> n) | (a[83] << (23 - n))) & 0x7fffff; - r[83] = ((a[83] >> n) | (a[84] << (23 - n))) & 0x7fffff; - r[84] = ((a[84] >> n) | (a[85] << (23 - n))) & 0x7fffff; - r[85] = ((a[85] >> n) | (a[86] << (23 - n))) & 0x7fffff; - r[86] = ((a[86] >> n) | (a[87] << (23 - n))) & 0x7fffff; - r[87] = ((a[87] >> n) | (a[88] << (23 - n))) & 0x7fffff; - r[88] = ((a[88] >> n) | (a[89] << (23 - n))) & 0x7fffff; - r[89] = ((a[89] >> n) | (a[90] << (23 - n))) & 0x7fffff; - r[90] = ((a[90] >> n) | (a[91] << (23 - n))) & 0x7fffff; - r[91] = ((a[91] >> n) | (a[92] << (23 - n))) & 0x7fffff; - r[92] = ((a[92] >> n) | (a[93] << (23 - n))) & 0x7fffff; - r[93] = ((a[93] >> n) | (a[94] << (23 - n))) & 0x7fffff; - r[94] = ((a[94] >> n) | (a[95] << (23 - n))) & 0x7fffff; - r[95] = ((a[95] >> n) | (a[96] << (23 - n))) & 0x7fffff; - r[96] = ((a[96] >> n) | (a[97] << (23 - n))) & 0x7fffff; - r[97] = ((a[97] >> n) | (a[98] << (23 - n))) & 0x7fffff; - r[98] = ((a[98] >> n) | (a[99] << (23 - n))) & 0x7fffff; - r[99] = ((a[99] >> n) | (a[100] << (23 - n))) & 0x7fffff; - r[100] = ((a[100] >> n) | (a[101] << (23 - n))) & 0x7fffff; - r[101] = ((a[101] >> n) | (a[102] << (23 - n))) & 0x7fffff; - r[102] = ((a[102] >> n) | (a[103] << (23 - n))) & 0x7fffff; - r[103] = ((a[103] >> n) | (a[104] << (23 - n))) & 0x7fffff; - r[104] = ((a[104] >> n) | (a[105] << (23 - n))) & 0x7fffff; - r[105] = ((a[105] >> n) | (a[106] << (23 - n))) & 0x7fffff; - r[106] = ((a[106] >> n) | (a[107] << (23 - n))) & 0x7fffff; - r[107] = ((a[107] >> n) | (a[108] << (23 - n))) & 0x7fffff; - r[108] = ((a[108] >> n) | (a[109] << (23 - n))) & 0x7fffff; - r[109] = ((a[109] >> n) | (a[110] << (23 - n))) & 0x7fffff; - r[110] = ((a[110] >> n) | (a[111] << (23 - n))) & 0x7fffff; - r[111] = ((a[111] >> n) | (a[112] << (23 - n))) & 0x7fffff; - r[112] = ((a[112] >> n) | (a[113] << (23 - n))) & 0x7fffff; - r[113] = ((a[113] >> n) | (a[114] << (23 - n))) & 0x7fffff; - r[114] = ((a[114] >> n) | (a[115] << (23 - n))) & 0x7fffff; - r[115] = ((a[115] >> n) | (a[116] << (23 - n))) & 0x7fffff; - r[116] = ((a[116] >> n) | (a[117] << (23 - n))) & 0x7fffff; - r[117] = ((a[117] >> n) | (a[118] << (23 - n))) & 0x7fffff; - r[118] = ((a[118] >> n) | (a[119] << (23 - n))) & 0x7fffff; - r[119] = ((a[119] >> n) | (a[120] << (23 - n))) & 0x7fffff; - r[120] = ((a[120] >> n) | (a[121] << (23 - n))) & 0x7fffff; - r[121] = ((a[121] >> n) | (a[122] << (23 - n))) & 0x7fffff; - r[122] = ((a[122] >> n) | (a[123] << (23 - n))) & 0x7fffff; - r[123] = ((a[123] >> n) | (a[124] << (23 - n))) & 0x7fffff; - r[124] = ((a[124] >> n) | (a[125] << (23 - n))) & 0x7fffff; - r[125] = ((a[125] >> n) | (a[126] << (23 - n))) & 0x7fffff; - r[126] = ((a[126] >> n) | (a[127] << (23 - n))) & 0x7fffff; - r[127] = ((a[127] >> n) | (a[128] << (23 - n))) & 0x7fffff; - r[128] = ((a[128] >> n) | (a[129] << (23 - n))) & 0x7fffff; - r[129] = ((a[129] >> n) | (a[130] << (23 - n))) & 0x7fffff; - r[130] = ((a[130] >> n) | (a[131] << (23 - n))) & 0x7fffff; - r[131] = ((a[131] >> n) | (a[132] << (23 - n))) & 0x7fffff; - r[132] = ((a[132] >> n) | (a[133] << (23 - n))) & 0x7fffff; - r[133] = ((a[133] >> n) | (a[134] << (23 - n))) & 0x7fffff; - r[134] = ((a[134] >> n) | (a[135] << (23 - n))) & 0x7fffff; + for (i=0; i<128; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + } #endif r[135] = a[135] >> n; } @@ -6426,7 +6223,7 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* AND m into each word of a and store in r. * * r A single precision integer. @@ -6661,6 +6458,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif /* WOLFSSL_SP_SMALL */ } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -6895,6 +6693,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 6cab32015..0eef655c1 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -50,7 +50,7 @@ #ifndef WOLFSSL_SP_ASM #if SP_WORD_SIZE == 64 -#if defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL) +#if (defined(WOLFSSL_SP_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* Mask for address to obfuscate which of the two address will be used. */ static const size_t addr_mask[2] = { 0, (size_t)-1 }; #endif @@ -701,7 +701,8 @@ SP_NOINLINE static void sp_2048_sqr_36(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -806,7 +807,7 @@ SP_NOINLINE static void sp_2048_sqr_18(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -829,7 +830,62 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) *rho = (1L << 57) - x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 36; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffl; + t >>= 57; + } + r[36] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl; + for (i = 0; i < 32; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl); + } + t[1] = tb * a[33]; + r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); + t[2] = tb * a[34]; + r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); + t[3] = tb * a[35]; + r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); + r[36] = (sp_digit)(t[3] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -1631,7 +1687,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif } -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. @@ -1897,6 +1953,7 @@ static void sp_2048_mont_reduce_36(sp_digit* a, sp_digit* m, sp_digit mp) int i; sp_digit mu; +#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<35; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffl; @@ -1919,6 +1976,17 @@ static void sp_2048_mont_reduce_36(sp_digit* a, sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffl; } +#else + for (i=0; i<35; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffl; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x1fffffffffffffl; + sp_2048_mul_add_36(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffl; +#endif sp_2048_mont_shift_36(a, a); sp_2048_cond_sub_36(a, a, m, 0 - ((a[35] >> 53) > 0)); @@ -1955,60 +2023,6 @@ static void sp_2048_mont_sqr_36(sp_digit* r, sp_digit* a, sp_digit* m, sp_2048_mont_reduce_36(r, m, mp); } -/* Multiply a by scalar b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A scalar. - */ -SP_NOINLINE static void sp_2048_mul_d_36(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - int128_t tb = b; - int128_t t = 0; - int i; - - for (i = 0; i < 36; i++) { - t += tb * a[i]; - r[i] = t & 0x1ffffffffffffffl; - t >>= 57; - } - r[36] = (sp_digit)t; -#else - int128_t tb = b; - int128_t t[8]; - int i; - - t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl; - for (i = 0; i < 32; i += 8) { - t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); - t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); - t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); - t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); - t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); - t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl); - t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl); - t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl); - } - t[1] = tb * a[33]; - r[33] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); - t[2] = tb * a[34]; - r[34] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); - t[3] = tb * a[35]; - r[35] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); - r[36] = (sp_digit)(t[3] >> 57); -#endif /* WOLFSSL_SP_SMALL */ -} - /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -2483,7 +2497,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* AND m into each word of a and store in r. * * r A single precision integer. @@ -2716,6 +2730,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif /* WOLFSSL_SP_SMALL */ } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -2950,6 +2965,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. @@ -4011,7 +4027,8 @@ SP_NOINLINE static void sp_3072_sqr_54(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -4225,7 +4242,7 @@ SP_NOINLINE static void sp_3072_sqr_27(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -4248,7 +4265,66 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) *rho = (1L << 57) - x; } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) +/* Multiply a by scalar b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A scalar. + */ +SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a, + const sp_digit b) +{ +#ifdef WOLFSSL_SP_SMALL + int128_t tb = b; + int128_t t = 0; + int i; + + for (i = 0; i < 54; i++) { + t += tb * a[i]; + r[i] = t & 0x1ffffffffffffffl; + t >>= 57; + } + r[54] = (sp_digit)t; +#else + int128_t tb = b; + int128_t t[8]; + int i; + + t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl; + for (i = 0; i < 48; i += 8) { + t[1] = tb * a[i+1]; + r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); + t[2] = tb * a[i+2]; + r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); + t[3] = tb * a[i+3]; + r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); + t[4] = tb * a[i+4]; + r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); + t[5] = tb * a[i+5]; + r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); + t[6] = tb * a[i+6]; + r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl); + t[7] = tb * a[i+7]; + r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl); + t[0] = tb * a[i+8]; + r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl); + } + t[1] = tb * a[49]; + r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); + t[2] = tb * a[50]; + r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); + t[3] = tb * a[51]; + r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); + t[4] = tb * a[52]; + r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); + t[5] = tb * a[53]; + r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); + r[54] = (sp_digit)(t[5] >> 57); +#endif /* WOLFSSL_SP_SMALL */ +} + +#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. * @@ -5035,7 +5111,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif } -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */ +#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. @@ -5308,6 +5384,7 @@ static void sp_3072_mont_reduce_54(sp_digit* a, sp_digit* m, sp_digit mp) int i; sp_digit mu; +#ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<53; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffl; @@ -5330,6 +5407,17 @@ static void sp_3072_mont_reduce_54(sp_digit* a, sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffl; } +#else + for (i=0; i<53; i++) { + mu = (a[i] * mp) & 0x1ffffffffffffffl; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + } + mu = (a[i] * mp) & 0x7ffffffffffffl; + sp_3072_mul_add_54(a+i, m, mu); + a[i+1] += a[i] >> 57; + a[i] &= 0x1ffffffffffffffl; +#endif sp_3072_mont_shift_54(a, a); sp_3072_cond_sub_54(a, a, m, 0 - ((a[53] >> 51) > 0)); @@ -5366,64 +5454,6 @@ static void sp_3072_mont_sqr_54(sp_digit* r, sp_digit* a, sp_digit* m, sp_3072_mont_reduce_54(r, m, mp); } -/* Multiply a by scalar b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A scalar. - */ -SP_NOINLINE static void sp_3072_mul_d_54(sp_digit* r, const sp_digit* a, - const sp_digit b) -{ -#ifdef WOLFSSL_SP_SMALL - int128_t tb = b; - int128_t t = 0; - int i; - - for (i = 0; i < 54; i++) { - t += tb * a[i]; - r[i] = t & 0x1ffffffffffffffl; - t >>= 57; - } - r[54] = (sp_digit)t; -#else - int128_t tb = b; - int128_t t[8]; - int i; - - t[0] = tb * a[0]; r[0] = t[0] & 0x1ffffffffffffffl; - for (i = 0; i < 48; i += 8) { - t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); - t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); - t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); - t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); - t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); - t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 57) + (t[6] & 0x1ffffffffffffffl); - t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 57) + (t[7] & 0x1ffffffffffffffl); - t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 57) + (t[0] & 0x1ffffffffffffffl); - } - t[1] = tb * a[49]; - r[49] = (sp_digit)(t[0] >> 57) + (t[1] & 0x1ffffffffffffffl); - t[2] = tb * a[50]; - r[50] = (sp_digit)(t[1] >> 57) + (t[2] & 0x1ffffffffffffffl); - t[3] = tb * a[51]; - r[51] = (sp_digit)(t[2] >> 57) + (t[3] & 0x1ffffffffffffffl); - t[4] = tb * a[52]; - r[52] = (sp_digit)(t[3] >> 57) + (t[4] & 0x1ffffffffffffffl); - t[5] = tb * a[53]; - r[53] = (sp_digit)(t[4] >> 57) + (t[5] & 0x1ffffffffffffffl); - r[54] = (sp_digit)(t[5] >> 57); -#endif /* WOLFSSL_SP_SMALL */ -} - /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -5863,7 +5893,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, sp_digit* a, sp_digit* e, int bits, #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) + !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* AND m into each word of a and store in r. * * r A single precision integer. @@ -6097,6 +6127,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif /* WOLFSSL_SP_SMALL */ } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -6331,6 +6362,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */ } +#endif /* !WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #ifdef WOLFSSL_HAVE_SP_DH /* Convert an array of sp_digit to an mp_int. diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 94bf98dda..ad62d5dc4 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -52,6 +52,7 @@ int sp_init(sp_int* a) return MP_OKAY; } +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Initialize up to six big numbers to be zero. * * a SP integer. @@ -92,6 +93,7 @@ int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d, sp_int* e, return MP_OKAY; } +#endif /* Clear the data from the big number and set to zero. * @@ -158,6 +160,7 @@ int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz) return MP_OKAY; } +#ifdef HAVE_ECC /* Convert a number as string in big-endian format to a big number. * Only supports base-16 (hexadecimal). * Negative values not supported. @@ -210,6 +213,7 @@ int sp_read_radix(sp_int* a, const char* in, int radix) return MP_OKAY; } +#endif /* Compare two big numbers. * @@ -284,6 +288,7 @@ int sp_leading_bit(sp_int* a) return bit; } +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Convert the big number to an array of bytes in big-endian format. * The array must be large enough for encoded number - use mp_unsigned_bin_size * to calculate the number of bytes required. @@ -307,6 +312,7 @@ int sp_to_unsigned_bin(sp_int* a, byte* out) return MP_OKAY; } +#endif /* Convert the big number to an array of bytes in big-endian format. * The array must be large enough for encoded number - use mp_unsigned_bin_size @@ -333,6 +339,8 @@ int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz) return MP_OKAY; } + +#if !defined(WOLFSSL_RSA_PUBLIC_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Ensure the data in the big number is zeroed. * * a SP integer. @@ -357,6 +365,7 @@ int sp_copy(sp_int* a, sp_int* b) } return MP_OKAY; } +#endif /* Set the big number to be the value of the digit. * @@ -371,6 +380,7 @@ int sp_set(sp_int* a, sp_int_digit d) return MP_OKAY; } +#if !defined(NO_DH) || defined(HAVE_ECC) /* Checks whether the value of the big number is zero. * * a SP integer. @@ -380,7 +390,9 @@ int sp_iszero(sp_int* a) { return a->used == 0; } +#endif +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Recalculate the number of digits used. * * a SP integer. @@ -436,6 +448,7 @@ int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r) return MP_OKAY; } +#endif /* Compare a one digit number with a big number. * @@ -464,6 +477,7 @@ int sp_cmp_d(sp_int *a, sp_int_digit d) return MP_EQ; } +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Left shift the number by number of bits. * Bits may be larger than the word size. * @@ -561,8 +575,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) return MP_OKAY; } +#endif -#if defined(USE_FAST_MATH) || !defined(NO_BIG_INT) /* Clear all data in the big number and sets value to zero. * * a SP integer. @@ -604,6 +618,7 @@ int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r) return MP_OKAY; } +#if !defined(WOLFSSL_RSA_VERIFY_ONLY) && (!defined(NO_DH) || defined(HAVE_ECC)) /* Left shift the big number by a number of digits. * WIll chop off digits overflowing maximum size. * @@ -622,7 +637,6 @@ int sp_lshd(sp_int* a, int s) return MP_OKAY; } -#endif #ifndef NO_PWDBASED /* Add two large numbers into result: r = a + b @@ -659,6 +673,7 @@ int sp_add(sp_int* a, sp_int* b, sp_int* r) return MP_OKAY; } +#endif /* NO_PWDBASED */ #endif #ifndef NO_RSA diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index f86ce1fe5..662e31690 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -8967,6 +8967,7 @@ int certext_test(void) } #endif /* WOLFSSL_CERT_EXT && WOLFSSL_TEST_CERT */ +#ifndef NO_ASN static int rsa_flatten_test(RsaKey* key) { int ret; @@ -9058,8 +9059,9 @@ static int rsa_flatten_test(RsaKey* key) return 0; } +#endif /* NO_ASN */ -#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) +#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(NO_ASN) static int rsa_export_key_test(RsaKey* key) { int ret; @@ -9131,7 +9133,7 @@ static int rsa_export_key_test(RsaKey* key) return 0; } -#endif /* !HAVE_FIPS */ +#endif /* !HAVE_FIPS && !USER_RSA && !NO_ASN */ #ifndef NO_SIG_WRAPPER static int rsa_sig_test(RsaKey* key, word32 keyLen, int modLen, WC_RNG* rng) @@ -9390,7 +9392,7 @@ static int rsa_nb_test(RsaKey* key, const byte* in, word32 inLen, byte* out, } #endif -#ifndef HAVE_USER_RSA +#if !defined(HAVE_USER_RSA) && !defined(NO_ASN) static int rsa_decode_test(RsaKey* keyPub) { int ret; @@ -10764,7 +10766,7 @@ int rsa_test(void) XMEMSET(&caKey, 0, sizeof(caKey)); #endif -#ifndef HAVE_USER_RSA +#if !defined(HAVE_USER_RSA) && !defined(NO_ASN) ret = rsa_decode_test(&key); if (ret != 0) return ret; @@ -10814,11 +10816,27 @@ int rsa_test(void) if (ret != 0) { ERROR_OUT(-7003, exit_rsa); } +#ifndef NO_ASN ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes); if (ret != 0) { ERROR_OUT(-7004, exit_rsa); } +#elif defined(WOLFSSL_RSA_VERIFY_ONLY) + #ifdef USE_CERT_BUFFERS_2048 + ret = mp_read_unsigned_bin(&key.n, &tmp[12], 256); + if (ret != 0) { + ERROR_OUT(-7004, exit_rsa); + } + ret = mp_set_int(&key.e, WC_RSA_EXPONENT); + if (ret != 0) { + ERROR_OUT(-7004, exit_rsa); + } + #else + #error Not supported yet! + #endif +#endif +#ifndef WC_NO_RNG #ifndef HAVE_FIPS ret = wc_InitRng_ex(&rng, HEAP_HINT, devId); #else @@ -10827,6 +10845,7 @@ int rsa_test(void) if (ret != 0) { ERROR_OUT(-7005, exit_rsa); } +#endif #ifndef NO_SIG_WRAPPER ret = rsa_sig_test(&key, sizeof(RsaKey), wc_RsaEncryptSize(&key), &rng); @@ -10840,6 +10859,7 @@ int rsa_test(void) goto exit_rsa; #endif +#ifndef WOLFSSL_RSA_VERIFY_ONLY do { #if defined(WOLFSSL_ASYNC_CRYPT) ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN); @@ -10908,6 +10928,49 @@ int rsa_test(void) if (ret < 0) { ERROR_OUT(-7013, exit_rsa); } +#else + (void)outSz; + (void)inLen; + (void)res; + { + byte signature_2048[] = { + 0x07, 0x6f, 0xc9, 0x85, 0x73, 0x9e, 0x21, 0x79, + 0x47, 0xf1, 0xa3, 0xd7, 0xf4, 0x27, 0x29, 0xbe, + 0x99, 0x5d, 0xac, 0xb2, 0x10, 0x3f, 0x95, 0xda, + 0x89, 0x23, 0xb8, 0x96, 0x13, 0x57, 0x72, 0x30, + 0xa1, 0xfe, 0x5a, 0x68, 0x9c, 0x99, 0x9d, 0x1e, + 0x05, 0xa4, 0x80, 0xb0, 0xbb, 0xd9, 0xd9, 0xa1, + 0x69, 0x97, 0x74, 0xb3, 0x41, 0x21, 0x3b, 0x47, + 0xf5, 0x51, 0xb1, 0xfb, 0xc7, 0xaa, 0xcc, 0xdc, + 0xcd, 0x76, 0xa0, 0x28, 0x4d, 0x27, 0x14, 0xa4, + 0xb9, 0x41, 0x68, 0x7c, 0xb3, 0x66, 0xe6, 0x6f, + 0x40, 0x76, 0xe4, 0x12, 0xfd, 0xae, 0x29, 0xb5, + 0x63, 0x60, 0x87, 0xce, 0x49, 0x6b, 0xf3, 0x05, + 0x9a, 0x14, 0xb5, 0xcc, 0xcd, 0xf7, 0x30, 0x95, + 0xd2, 0x72, 0x52, 0x1d, 0x5b, 0x7e, 0xef, 0x4a, + 0x02, 0x96, 0x21, 0x6c, 0x55, 0xa5, 0x15, 0xb1, + 0x57, 0x63, 0x2c, 0xa3, 0x8e, 0x9d, 0x3d, 0x45, + 0xcc, 0xb8, 0xe6, 0xa1, 0xc8, 0x59, 0xcd, 0xf5, + 0xdc, 0x0a, 0x51, 0xb6, 0x9d, 0xfb, 0xf4, 0x6b, + 0xfd, 0x32, 0x71, 0x6e, 0xcf, 0xcb, 0xb3, 0xd9, + 0xe0, 0x4a, 0x77, 0x34, 0xd6, 0x61, 0xf5, 0x7c, + 0xf9, 0xa9, 0xa4, 0xb0, 0x8e, 0x3b, 0xd6, 0x04, + 0xe0, 0xde, 0x2b, 0x5b, 0x5a, 0xbf, 0xd9, 0xef, + 0x8d, 0xa3, 0xf5, 0xb1, 0x67, 0xf3, 0xb9, 0x72, + 0x0a, 0x37, 0x12, 0x35, 0x6c, 0x8e, 0x10, 0x8b, + 0x38, 0x06, 0x16, 0x4b, 0x20, 0x20, 0x13, 0x00, + 0x2e, 0x6d, 0xc2, 0x59, 0x23, 0x67, 0x4a, 0x6d, + 0xa1, 0x46, 0x8b, 0xee, 0xcf, 0x44, 0xb4, 0x3e, + 0x56, 0x75, 0x00, 0x68, 0xb5, 0x7d, 0x0f, 0x20, + 0x79, 0x5d, 0x7f, 0x12, 0x15, 0x32, 0x89, 0x61, + 0x6b, 0x29, 0xb7, 0x52, 0xf5, 0x25, 0xd8, 0x98, + 0xe8, 0x6f, 0xf9, 0x22, 0xb4, 0xbb, 0xe5, 0xff, + 0xd0, 0x92, 0x86, 0x9a, 0x88, 0xa2, 0xaf, 0x6b + }; + ret = sizeof(signature_2048); + XMEMCPY(out, signature_2048, ret); + } +#endif idx = (word32)ret; XMEMSET(plain, 0, plainSz); @@ -10916,7 +10979,15 @@ int rsa_test(void) ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN); #endif if (ret >= 0) { +#ifndef WOLFSSL_RSA_VERIFY_INLINE ret = wc_RsaSSL_Verify(out, idx, plain, plainSz, &key); +#else + byte* dec = NULL; + ret = wc_RsaSSL_VerifyInline(out, idx, &dec, &key); + if (ret > 0) { + XMEMCPY(plain, dec, ret); + } +#endif } } while (ret == WC_PENDING_E); if (ret < 0) { @@ -11197,15 +11268,17 @@ int rsa_test(void) #endif /* !HAVE_FAST_RSA && !HAVE_FIPS */ #endif /* WC_NO_RSA_OAEP */ -#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) +#if !defined(HAVE_FIPS) && !defined(HAVE_USER_RSA) && !defined(NO_ASN) ret = rsa_export_key_test(&key); if (ret != 0) return ret; #endif +#ifndef NO_ASN ret = rsa_flatten_test(&key); if (ret != 0) return ret; +#endif #if defined(WOLFSSL_MDK_ARM) #define sizeof(s) XSTRLEN((char *)(s)) diff --git a/wolfssl/wolfcrypt/rsa.h b/wolfssl/wolfcrypt/rsa.h index ae3af7ec2..8c4f36f56 100644 --- a/wolfssl/wolfcrypt/rsa.h +++ b/wolfssl/wolfcrypt/rsa.h @@ -137,9 +137,12 @@ typedef struct RsaNb { /* RSA */ struct RsaKey { - mp_int n, e, d, p, q; + mp_int n, e; +#ifndef WOLFSSL_RSA_PUBLIC_ONLY + mp_int d, p, q; #if defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || !defined(RSA_LOW_MEM) mp_int dP, dQ, u; +#endif #endif void* heap; /* for user memory overrides */ byte* data; /* temp buffer for async RSA */ @@ -167,7 +170,9 @@ struct RsaKey { byte id[RSA_MAX_ID_LEN]; int idLen; #endif +#if defined(WOLFSSL_ASYNC_CRYPT) || !defined(WOLFSSL_RSA_VERIFY_INLINE) byte dataIsAlloc; +#endif #ifdef WC_RSA_NONBLOCK RsaNb* nb; #endif diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index 8ad27739f..8f79d204b 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -1819,6 +1819,43 @@ extern void uITRON4_free(void *p) ; #define WOLFSSL_NO_HASH_RAW #endif +#if !defined(WOLFSSL_SHA384) && !defined(WOLFSSL_SHA512) && defined(NO_AES) && \ + !defined(WOLFSSL_SHA3) + #undef WOLFSSL_NO_WORD64_OPS + #define WOLFSSL_NO_WORD64_OPS +#endif + +#if defined(NO_AES) && defined(NO_DES3) && !defined(HAVE_CAMELLIA) && \ + defined(NO_PWDBASED) && !defined(HAVE_IDEA) + #undef WOLFSSL_NO_XOR_OPS + #define WOLFSSL_NO_XOR_OPS +#endif + +#if defined(NO_ASN) && defined(WOLFCRYPT_ONLY) + #undef WOLFSSL_NO_INT_ENCODE + #define WOLFSSL_NO_INT_ENCODE + #undef WOLFSSL_NO_INT_DECODE + #define WOLFSSL_NO_INT_DECODE +#endif + +#if defined(WOLFCRYPT_ONLY) && defined(WOLFSSL_RSA_VERIFY_ONLY) + #undef WOLFSSL_NO_CT_OPS + #define WOLFSSL_NO_CT_OPS +#endif + +#if defined(WOLFCRYPT_ONLY) && defined(NO_AES) && !defined(HAVE_CURVE25519) && \ + defined(WC_NO_RNG) && defined(WC_NO_RSA_OAEP) + #undef WOLFSSL_NO_CONST_CMP + #define WOLFSSL_NO_CONST_CMP +#endif + +#if defined(WOLFCRYPT_ONLY) && defined(NO_AES) && !defined(WOLFSSL_SHA384) && \ + !defined(WOLFSSL_SHA512) && defined(WC_NO_RNG) && \ + defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_RSA_PUBLIC_ONLY) + #undef WOLFSSL_NO_FORCE_ZERO + #define WOLFSSL_NO_FORCE_ZERO +#endif + #ifdef __cplusplus } /* extern "C" */ #endif From fb41510f122510c0c18ee4cfa8ce040b47916a25 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 10 Dec 2018 11:52:10 +1000 Subject: [PATCH 2/2] Fixup for compiling verify only inline for PPC using GCC --- wolfcrypt/benchmark/benchmark.c | 157 ++- wolfcrypt/src/rsa.c | 4 + wolfcrypt/src/sp_c32.c | 1881 +++++++++++++++---------------- wolfcrypt/src/tfm.c | 8 +- 4 files changed, 1071 insertions(+), 979 deletions(-) diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 6fd063d4a..a11f9d0c0 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -458,8 +458,7 @@ static const char* bench_result_words1[][4] = { #endif }; -#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) ||\ - defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ +#if !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ defined(HAVE_ECC) || !defined(NO_DH) || defined(HAVE_ECC_ENCRYPT) || \ defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \ defined(HAVE_ED25519) @@ -578,9 +577,7 @@ static const char* bench_desc_words[][9] = { #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) || defined(HAVE_ECC) || \ defined(HAVE_ECC) || defined(HAVE_NTRU) || !defined(NO_DH) || \ - (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \ - defined(HAVE_SCRYPT) - + !defined(NO_RSA) || defined(HAVE_SCRYPT) #define BENCH_ASYM #endif @@ -3862,43 +3859,136 @@ void bench_rsaKeyGen_size(int doAsync, int keySz) #define RSA_BUF_SIZE 384 /* for up to 3072 bit */ +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#elif defined(USE_CERT_BUFFERS_2048) +static unsigned char rsa_2048_sig[] = { + 0x8c, 0x9e, 0x37, 0xbf, 0xc3, 0xa6, 0xba, 0x1c, + 0x53, 0x22, 0x40, 0x4b, 0x8b, 0x0d, 0x3c, 0x0e, + 0x2e, 0x8c, 0x31, 0x2c, 0x47, 0xbf, 0x03, 0x48, + 0x18, 0x46, 0x73, 0x8d, 0xd7, 0xdd, 0x17, 0x64, + 0x0d, 0x7f, 0xdc, 0x74, 0xed, 0x80, 0xc3, 0xe8, + 0x9a, 0x18, 0x33, 0xd4, 0xe6, 0xc5, 0xe1, 0x54, + 0x75, 0xd1, 0xbb, 0x40, 0xde, 0xa8, 0xb9, 0x1b, + 0x14, 0xe8, 0xc1, 0x39, 0xeb, 0xa0, 0x69, 0x8a, + 0xc6, 0x9b, 0xef, 0x53, 0xb5, 0x23, 0x2b, 0x78, + 0x06, 0x43, 0x37, 0x11, 0x81, 0x84, 0x73, 0x33, + 0x33, 0xfe, 0xf7, 0x5d, 0x2b, 0x84, 0xd6, 0x83, + 0xd6, 0xdd, 0x55, 0x33, 0xef, 0xd1, 0xf7, 0x12, + 0xb0, 0xc2, 0x0e, 0xb1, 0x78, 0xd4, 0xa8, 0xa3, + 0x25, 0xeb, 0xed, 0x9a, 0xb3, 0xee, 0xc3, 0x7e, + 0xce, 0x13, 0x18, 0x86, 0x31, 0xe1, 0xef, 0x01, + 0x0f, 0x6e, 0x67, 0x24, 0x74, 0xbd, 0x0b, 0x7f, + 0xa9, 0xca, 0x6f, 0xaa, 0x83, 0x28, 0x90, 0x40, + 0xf1, 0xb5, 0x10, 0x0e, 0x26, 0x03, 0x05, 0x5d, + 0x87, 0xb4, 0xe0, 0x4c, 0x98, 0xd8, 0xc6, 0x42, + 0x89, 0x77, 0xeb, 0xb6, 0xd4, 0xe6, 0x26, 0xf3, + 0x31, 0x25, 0xde, 0x28, 0x38, 0x58, 0xe8, 0x2c, + 0xf4, 0x56, 0x7c, 0xb6, 0xfd, 0x99, 0xb0, 0xb0, + 0xf4, 0x83, 0xb6, 0x74, 0xa9, 0x5b, 0x9f, 0xe8, + 0xe9, 0xf1, 0xa1, 0x2a, 0xbd, 0xf6, 0x83, 0x28, + 0x09, 0xda, 0xa6, 0xd6, 0xcd, 0x61, 0x60, 0xf7, + 0x13, 0x4e, 0x46, 0x57, 0x38, 0x1e, 0x11, 0x92, + 0x6b, 0x6b, 0xcf, 0xd3, 0xf4, 0x8b, 0x66, 0x03, + 0x25, 0xa3, 0x7a, 0x2f, 0xce, 0xc1, 0x85, 0xa5, + 0x48, 0x91, 0x8a, 0xb3, 0x4f, 0x5d, 0x98, 0xb1, + 0x69, 0x58, 0x47, 0x69, 0x0c, 0x52, 0xdc, 0x42, + 0x4c, 0xef, 0xe8, 0xd4, 0x4d, 0x6a, 0x33, 0x7d, + 0x9e, 0xd2, 0x51, 0xe6, 0x41, 0xbf, 0x4f, 0xa2 +}; +#elif defined(USE_CERT_BUFFERS_3072) +static unsigned char rsa_3072_sig[] = { + 0x1a, 0xd6, 0x0d, 0xfd, 0xe3, 0x41, 0x95, 0x76, + 0x27, 0x16, 0x7d, 0xc7, 0x94, 0x16, 0xca, 0xa8, + 0x26, 0x08, 0xbe, 0x78, 0x87, 0x72, 0x4c, 0xd9, + 0xa7, 0xfc, 0x33, 0x77, 0x2d, 0x53, 0x07, 0xb5, + 0x8c, 0xce, 0x48, 0x17, 0x9b, 0xff, 0x9f, 0x9b, + 0x17, 0xc4, 0xbb, 0x72, 0xed, 0xdb, 0xa0, 0x34, + 0x69, 0x5b, 0xc7, 0x4e, 0xbf, 0xec, 0x13, 0xc5, + 0x98, 0x71, 0x9a, 0x4e, 0x18, 0x0e, 0xcb, 0xe7, + 0xc6, 0xd5, 0x21, 0x31, 0x7c, 0x0d, 0xae, 0x14, + 0x2b, 0x87, 0x4f, 0x77, 0x95, 0x2e, 0x26, 0xe2, + 0x83, 0xfe, 0x49, 0x1e, 0x87, 0x19, 0x4a, 0x63, + 0x73, 0x75, 0xf1, 0xf5, 0x71, 0xd2, 0xce, 0xd4, + 0x39, 0x2b, 0xd9, 0xe0, 0x76, 0x70, 0xc8, 0xf8, + 0xed, 0xdf, 0x90, 0x57, 0x17, 0xb9, 0x16, 0xf6, + 0xe9, 0x49, 0x48, 0xce, 0x5a, 0x8b, 0xe4, 0x84, + 0x7c, 0xf3, 0x31, 0x68, 0x97, 0x45, 0x68, 0x38, + 0x50, 0x3a, 0x70, 0xbd, 0xb3, 0xd3, 0xd2, 0xe0, + 0x56, 0x5b, 0xc2, 0x0c, 0x2c, 0x10, 0x70, 0x7b, + 0xd4, 0x99, 0xf9, 0x38, 0x31, 0xb1, 0x86, 0xa0, + 0x07, 0xf1, 0xf6, 0x53, 0xb0, 0x44, 0x82, 0x40, + 0xd2, 0xab, 0x0e, 0x71, 0x5d, 0xe1, 0xea, 0x3a, + 0x77, 0xc9, 0xef, 0xfe, 0x54, 0x65, 0xa3, 0x49, + 0xfd, 0xa5, 0x33, 0xaa, 0x16, 0x1a, 0x38, 0xe7, + 0xaa, 0xb7, 0x13, 0xb2, 0x3b, 0xc7, 0x00, 0x87, + 0x12, 0xfe, 0xfd, 0xf4, 0x55, 0x6d, 0x1d, 0x4a, + 0x0e, 0xad, 0xd0, 0x4c, 0x55, 0x91, 0x60, 0xd9, + 0xef, 0x74, 0x69, 0x22, 0x8c, 0x51, 0x65, 0xc2, + 0x04, 0xac, 0xd3, 0x8d, 0xf7, 0x35, 0x29, 0x13, + 0x6d, 0x61, 0x7c, 0x39, 0x2f, 0x41, 0x4c, 0xdf, + 0x38, 0xfd, 0x1a, 0x7d, 0x42, 0xa7, 0x6f, 0x3f, + 0x3d, 0x9b, 0xd1, 0x97, 0xab, 0xc0, 0xa7, 0x28, + 0x1c, 0xc0, 0x02, 0x26, 0xeb, 0xce, 0xf9, 0xe1, + 0x34, 0x45, 0xaf, 0xbf, 0x8d, 0xb8, 0xe0, 0xff, + 0xd9, 0x6f, 0x77, 0xf3, 0xf7, 0xed, 0x6a, 0xbb, + 0x03, 0x52, 0xfb, 0x38, 0xfc, 0xea, 0x9f, 0xc9, + 0x98, 0xed, 0x21, 0x45, 0xaf, 0x43, 0x2b, 0x64, + 0x96, 0x82, 0x30, 0xe9, 0xb4, 0x36, 0x89, 0x77, + 0x07, 0x4a, 0xc6, 0x1f, 0x38, 0x7a, 0xee, 0xb6, + 0x86, 0xf6, 0x2f, 0x03, 0xec, 0xa2, 0xe5, 0x48, + 0xe5, 0x5a, 0xf5, 0x1c, 0xd2, 0xd9, 0xd8, 0x2d, + 0x9d, 0x06, 0x07, 0xc9, 0x8b, 0x5d, 0xe0, 0x0f, + 0x5e, 0x0c, 0x53, 0x27, 0xff, 0x23, 0xee, 0xca, + 0x5e, 0x4d, 0xf1, 0x95, 0x77, 0x78, 0x1f, 0xf2, + 0x44, 0x5b, 0x7d, 0x01, 0x49, 0x61, 0x6f, 0x6d, + 0xbf, 0xf5, 0x19, 0x06, 0x39, 0xe9, 0xe9, 0x29, + 0xde, 0x47, 0x5e, 0x2e, 0x1f, 0x68, 0xf4, 0x32, + 0x5e, 0xe9, 0xd0, 0xa7, 0xb4, 0x2a, 0x45, 0xdf, + 0x15, 0x7d, 0x0d, 0x5b, 0xef, 0xc6, 0x23, 0xac +}; +#else + #error Not Supported Yet! +#endif + static void bench_rsa_helper(int doAsync, RsaKey rsaKey[BENCH_MAX_PENDING], int rsaKeySz) { -#ifndef WOLFSSL_RSA_VERIFY_ONLY int ret = 0, i, times, count = 0, pending = 0; -#ifndef WOLFSSL_RSA_PUBLIC_ONLY word32 idx = 0; -#endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY const char* messageStr = "Everyone gets Friday off."; const int len = (int)XSTRLEN((char*)messageStr); +#endif double start = 0.0f; const char**desc = bench_desc_words[lng_index]; +#ifndef WOLFSSL_RSA_PUBLIC_ONLY DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT); -#else - (void)doAsync; - (void)rsaKey; - (void)rsaKeySz; #endif -#ifndef WOLFSSL_RSA_VERIFY_ONLY #ifdef USE_CERT_BUFFERS_1024 DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); - DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); + #ifndef WOLFSSL_RSA_VERIFY_INLINE + DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; + #endif #elif defined(USE_CERT_BUFFERS_2048) DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); - #ifndef WOLFSSL_RSA_PUBLIC_ONLY + #ifndef WOLFSSL_RSA_VERIFY_INLINE DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; #endif #elif defined(USE_CERT_BUFFERS_3072) DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); - #ifndef WOLFSSL_RSA_PUBLIC_ONLY + #ifndef WOLFSSL_RSA_VERIFY_INLINE DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; #endif #else #error "need a cert buffer size" #endif /* USE_CERT_BUFFERS */ -#endif if (!rsa_sign_verify) { #ifndef WOLFSSL_RSA_VERIFY_ONLY @@ -3995,6 +4085,7 @@ exit_rsa_sign: if (ret < 0) { goto exit; } +#endif /* capture resulting encrypt length */ idx = rsaKeySz/8; @@ -4009,8 +4100,26 @@ exit_rsa_sign: for (i = 0; i < BENCH_MAX_PENDING; i++) { if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes, &pending)) { + #ifndef WOLFSSL_RSA_VERIFY_INLINE ret = wc_RsaSSL_Verify(enc[i], idx, out[i], rsaKeySz/8, &rsaKey[i]); + #elif defined(USE_CERT_BUFFERS_2048) + XMEMCPY(enc[i], rsa_2048_sig, sizeof(rsa_2048_sig)); + idx = sizeof(rsa_2048_sig); + out[i] = NULL; + ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i], + &rsaKey[i]); + if (ret > 0) + ret = 0; + #elif defined(USE_CERT_BUFFERS_3072) + XMEMCPY(enc[i], rsa_3072_sig, sizeof(rsa_3072_sig)); + idx = sizeof(rsa_3072_sig); + out[i] = NULL; + ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i], + &rsaKey[i]); + if (ret > 0) + ret = 0; + #endif if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, &pending)) { @@ -4024,7 +4133,6 @@ exit_rsa_sign: exit_rsa_verify: bench_stats_asym_finish("RSA", rsaKeySz, desc[5], doAsync, count, start, ret); -#endif } FREE_ARRAY(enc, BENCH_MAX_PENDING, HEAP_HINT); @@ -4085,7 +4193,18 @@ void bench_rsa(int doAsync) } #else #ifdef USE_CERT_BUFFERS_2048 - ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[13], 256); + ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[12], 256); + if (ret != 0) { + printf("Setting modulus failed! %d\n", ret); + goto exit_bench_rsa; + } + ret = mp_set_int(&rsaKey[i].e, WC_RSA_EXPONENT); + if (ret != 0) { + printf("Setting public exponent failed! %d\n", ret); + goto exit_bench_rsa; + } + #elif defined(USE_CERT_BUFFERS_3072) + ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[12], 384); if (ret != 0) { printf("Setting modulus failed! %d\n", ret); goto exit_bench_rsa; diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 896d09e2c..c5c50c7e3 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -1253,7 +1253,9 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, { int ret; word32 i; +#ifndef WOLFSSL_RSA_VERIFY_ONLY byte invalid = 0; +#endif if (output == NULL || pkcsBlockLen == 0) { return BAD_FUNC_ARG; @@ -1278,6 +1280,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, *output = (byte *)(pkcsBlock + i); ret = pkcsBlockLen - i; } +#ifndef WOLFSSL_RSA_VERIFY_ONLY else { word32 j; byte pastSep = 0; @@ -1301,6 +1304,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, *output = (byte *)(pkcsBlock + i); ret = ((int)~invalid) & (pkcsBlockLen - i); } +#endif return ret; } diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 37f023068..cc20b81d7 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -3753,9 +3753,9 @@ static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n) r[0] = 0; for (i = n-1; i >= 0; i--) { r[j] |= ((sp_digit)a[i]) << s; - if (s >= 15) { - r[j] &= 0x7fffff; - s = 23 - s; + if (s >= 14) { + r[j] &= 0x3fffff; + s = 22 - s; if (j + 1 >= max) break; r[++j] = a[i] >> s; @@ -3776,27 +3776,27 @@ static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n) */ static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a) { -#if DIGIT_BIT == 23 +#if DIGIT_BIT == 22 int j; XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); for (j = a->used; j < max; j++) r[j] = 0; -#elif DIGIT_BIT > 23 +#elif DIGIT_BIT > 22 int i, j = 0, s = 0; r[0] = 0; for (i = 0; i < a->used && j < max; i++) { r[j] |= a->dp[i] << s; - r[j] &= 0x7fffff; - s = 23 - s; + r[j] &= 0x3fffff; + s = 22 - s; if (j + 1 >= max) break; r[++j] = a->dp[i] >> s; - while (s + 23 <= DIGIT_BIT) { - s += 23; - r[j] &= 0x7fffff; + while (s + 22 <= DIGIT_BIT) { + s += 22; + r[j] &= 0x3fffff; if (j + 1 >= max) break; if (s < DIGIT_BIT) @@ -3815,11 +3815,11 @@ static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a) r[0] = 0; for (i = 0; i < a->used && j < max; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; - if (s + DIGIT_BIT >= 23) { - r[j] &= 0x7fffff; + if (s + DIGIT_BIT >= 22) { + r[j] &= 0x3fffff; if (j + 1 >= max) break; - s = 23 - s; + s = 22 - s; if (s == DIGIT_BIT) { r[++j] = 0; s = 0; @@ -3848,23 +3848,23 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) { int i, j, s = 0, b; - for (i=0; i<135; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + for (i=0; i<139; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } j = 3072 / 8 - 1; a[j] = 0; - for (i=0; i<136 && j>=0; i++) { + for (i=0; i<140 && j>=0; i++) { b = 0; a[j--] |= r[i] << s; b += 8 - s; if (j < 0) break; - while (b < 23) { + while (b < 22) { a[j--] = r[i] >> b; b += 8; if (j < 0) break; } - s = 8 - (b - 23); + s = 8 - (b - 22); if (j >= 0) a[j] = 0; if (s != 0) @@ -3879,22 +3879,22 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_35(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j; - int64_t t[34]; + int64_t t[70]; XMEMSET(t, 0, sizeof(t)); - for (i=0; i<17; i++) { - for (j=0; j<17; j++) + for (i=0; i<35; i++) { + for (j=0; j<35; j++) t[i+j] += ((int64_t)a[i]) * b[j]; } - for (i=0; i<33; i++) { - r[i] = t[i] & 0x7fffff; - t[i+1] += t[i] >> 23; + for (i=0; i<69; i++) { + r[i] = t[i] & 0x3fffff; + t[i+1] += t[i] >> 22; } - r[33] = (sp_digit)t[33]; + r[69] = (sp_digit)t[69]; } /* Square a and put result in r. (r = a * a) @@ -3902,22 +3902,22 @@ SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_35(sp_digit* r, const sp_digit* a) { int i, j; - int64_t t[34]; + int64_t t[70]; XMEMSET(t, 0, sizeof(t)); - for (i=0; i<17; i++) { + for (i=0; i<35; i++) { for (j=0; j> 23; + for (i=0; i<69; i++) { + r[i] = t[i] & 0x3fffff; + t[i+1] += t[i] >> 22; } - r[33] = (sp_digit)t[33]; + r[69] = (sp_digit)t[69]; } /* Add b to a into r. (r = a + b) @@ -3926,33 +3926,7 @@ SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - int i; - - for (i = 0; i < 16; i += 8) { - r[i + 0] = a[i + 0] + b[i + 0]; - r[i + 1] = a[i + 1] + b[i + 1]; - r[i + 2] = a[i + 2] + b[i + 2]; - r[i + 3] = a[i + 3] + b[i + 3]; - r[i + 4] = a[i + 4] + b[i + 4]; - r[i + 5] = a[i + 5] + b[i + 5]; - r[i + 6] = a[i + 6] + b[i + 6]; - r[i + 7] = a[i + 7] + b[i + 7]; - } - r[16] = a[16] + b[16]; - - return 0; -} - -/* Add b to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_35(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -3969,88 +3943,18 @@ SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a, } r[32] = a[32] + b[32]; r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; return 0; } -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static int sp_3072_sub_34(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - int i; - - for (i = 0; i < 32; i += 8) { - r[i + 0] = a[i + 0] - b[i + 0]; - r[i + 1] = a[i + 1] - b[i + 1]; - r[i + 2] = a[i + 2] - b[i + 2]; - r[i + 3] = a[i + 3] - b[i + 3]; - r[i + 4] = a[i + 4] - b[i + 4]; - r[i + 5] = a[i + 5] - b[i + 5]; - r[i + 6] = a[i + 6] - b[i + 6]; - r[i + 7] = a[i + 7] - b[i + 7]; - } - r[32] = a[32] - b[32]; - r[33] = a[33] - b[33]; - - return 0; -} - -/* Multiply a and b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static void sp_3072_mul_34(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit* z0 = r; - sp_digit z1[34]; - sp_digit* a1 = z1; - sp_digit b1[17]; - sp_digit* z2 = r + 34; - sp_3072_add_17(a1, a, &a[17]); - sp_3072_add_17(b1, b, &b[17]); - sp_3072_mul_17(z2, &a[17], &b[17]); - sp_3072_mul_17(z0, a, b); - sp_3072_mul_17(z1, a1, b1); - sp_3072_sub_34(z1, z1, z2); - sp_3072_sub_34(z1, z1, z0); - sp_3072_add_34(r + 17, r + 17, z1); -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_34(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z1[34]; - sp_digit* a1 = z1; - sp_digit* z2 = r + 34; - sp_3072_add_17(a1, a, &a[17]); - sp_3072_sqr_17(z2, &a[17]); - sp_3072_sqr_17(z0, a); - sp_3072_sqr_17(z1, a1); - sp_3072_sub_34(z1, z1, z2); - sp_3072_sub_34(z1, z1, z0); - sp_3072_add_34(r + 17, r + 17, z1); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4069,6 +3973,8 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, r[65] = a[65] + b[65]; r[66] = a[66] + b[66]; r[67] = a[67] + b[67]; + r[68] = a[68] + b[68]; + r[69] = a[69] + b[69]; return 0; } @@ -4079,7 +3985,7 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4098,6 +4004,8 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, r[65] = a[65] - b[65]; r[66] = a[66] - b[66]; r[67] = a[67] - b[67]; + r[68] = a[68] - b[68]; + r[69] = a[69] - b[69]; return 0; } @@ -4108,22 +4016,22 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit* z0 = r; - sp_digit z1[68]; + sp_digit z1[70]; sp_digit* a1 = z1; - sp_digit b1[34]; - sp_digit* z2 = r + 68; - sp_3072_add_34(a1, a, &a[34]); - sp_3072_add_34(b1, b, &b[34]); - sp_3072_mul_34(z2, &a[34], &b[34]); - sp_3072_mul_34(z0, a, b); - sp_3072_mul_34(z1, a1, b1); - sp_3072_sub_68(z1, z1, z2); - sp_3072_sub_68(z1, z1, z0); - sp_3072_add_68(r + 34, r + 34, z1); + sp_digit b1[35]; + sp_digit* z2 = r + 70; + sp_3072_add_35(a1, a, &a[35]); + sp_3072_add_35(b1, b, &b[35]); + sp_3072_mul_35(z2, &a[35], &b[35]); + sp_3072_mul_35(z0, a, b); + sp_3072_mul_35(z1, a1, b1); + sp_3072_sub_70(z1, z1, z2); + sp_3072_sub_70(z1, z1, z0); + sp_3072_add_70(r + 35, r + 35, z1); } /* Square a and put result in r. (r = a * a) @@ -4131,19 +4039,19 @@ SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_70(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z1[68]; + sp_digit z1[70]; sp_digit* a1 = z1; - sp_digit* z2 = r + 68; - sp_3072_add_34(a1, a, &a[34]); - sp_3072_sqr_34(z2, &a[34]); - sp_3072_sqr_34(z0, a); - sp_3072_sqr_34(z1, a1); - sp_3072_sub_68(z1, z1, z2); - sp_3072_sub_68(z1, z1, z0); - sp_3072_add_68(r + 34, r + 34, z1); + sp_digit* z2 = r + 70; + sp_3072_add_35(a1, a, &a[35]); + sp_3072_sqr_35(z2, &a[35]); + sp_3072_sqr_35(z0, a); + sp_3072_sqr_35(z1, a1); + sp_3072_sub_70(z1, z1, z2); + sp_3072_sub_70(z1, z1, z0); + sp_3072_add_70(r + 35, r + 35, z1); } /* Add b to a into r. (r = a + b) @@ -4152,7 +4060,7 @@ SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4167,6 +4075,10 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + b[i + 6]; r[i + 7] = a[i + 7] + b[i + 7]; } + r[136] = a[136] + b[136]; + r[137] = a[137] + b[137]; + r[138] = a[138] + b[138]; + r[139] = a[139] + b[139]; return 0; } @@ -4177,7 +4089,7 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4192,6 +4104,10 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] - b[i + 6]; r[i + 7] = a[i + 7] - b[i + 7]; } + r[136] = a[136] - b[136]; + r[137] = a[137] - b[137]; + r[138] = a[138] - b[138]; + r[139] = a[139] - b[139]; return 0; } @@ -4202,22 +4118,22 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit* z0 = r; - sp_digit z1[136]; + sp_digit z1[140]; sp_digit* a1 = z1; - sp_digit b1[68]; - sp_digit* z2 = r + 136; - sp_3072_add_68(a1, a, &a[68]); - sp_3072_add_68(b1, b, &b[68]); - sp_3072_mul_68(z2, &a[68], &b[68]); - sp_3072_mul_68(z0, a, b); - sp_3072_mul_68(z1, a1, b1); - sp_3072_sub_136(z1, z1, z2); - sp_3072_sub_136(z1, z1, z0); - sp_3072_add_136(r + 68, r + 68, z1); + sp_digit b1[70]; + sp_digit* z2 = r + 140; + sp_3072_add_70(a1, a, &a[70]); + sp_3072_add_70(b1, b, &b[70]); + sp_3072_mul_70(z2, &a[70], &b[70]); + sp_3072_mul_70(z0, a, b); + sp_3072_mul_70(z1, a1, b1); + sp_3072_sub_140(z1, z1, z2); + sp_3072_sub_140(z1, z1, z0); + sp_3072_add_140(r + 70, r + 70, z1); } /* Square a and put result in r. (r = a * a) @@ -4225,19 +4141,19 @@ SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_140(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z1[136]; + sp_digit z1[140]; sp_digit* a1 = z1; - sp_digit* z2 = r + 136; - sp_3072_add_68(a1, a, &a[68]); - sp_3072_sqr_68(z2, &a[68]); - sp_3072_sqr_68(z0, a); - sp_3072_sqr_68(z1, a1); - sp_3072_sub_136(z1, z1, z2); - sp_3072_sub_136(z1, z1, z0); - sp_3072_add_136(r + 68, r + 68, z1); + sp_digit* z2 = r + 140; + sp_3072_add_70(a1, a, &a[70]); + sp_3072_sqr_70(z2, &a[70]); + sp_3072_sqr_70(z0, a); + sp_3072_sqr_70(z1, a1); + sp_3072_sub_140(z1, z1, z2); + sp_3072_sub_140(z1, z1, z0); + sp_3072_add_140(r + 70, r + 70, z1); } #endif /* WOLFSSL_SP_SMALL */ @@ -4248,12 +4164,12 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + b[i]; return 0; @@ -4266,12 +4182,12 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - b[i]; return 0; @@ -4285,30 +4201,30 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j, k; int64_t c; - c = ((int64_t)a[135]) * b[135]; - r[271] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 269; k >= 0; k--) { - for (i = 135; i >= 0; i--) { + c = ((int64_t)a[139]) * b[139]; + r[279] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 277; k >= 0; k--) { + for (i = 139; i >= 0; i--) { j = k - i; - if (j >= 136) + if (j >= 140) break; if (j < 0) continue; c += ((int64_t)a[i]) * b[j]; } - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } /* Square a and put result in r. (r = a * a) @@ -4316,18 +4232,18 @@ SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_140(sp_digit* r, const sp_digit* a) { int i, j, k; int64_t c; - c = ((int64_t)a[135]) * a[135]; - r[271] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 269; k >= 0; k--) { - for (i = 135; i >= 0; i--) { + c = ((int64_t)a[139]) * a[139]; + r[279] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 277; k >= 0; k--) { + for (i = 139; i >= 0; i--) { j = k - i; - if (j >= 136 || i <= j) + if (j >= 140 || i <= j) break; if (j < 0) continue; @@ -4337,11 +4253,11 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) if (i == j) c += ((int64_t)a[i]) * a[i]; - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } #endif /* WOLFSSL_SP_SMALL */ @@ -4354,12 +4270,12 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + b[i]; return 0; @@ -4372,12 +4288,12 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - b[i]; return 0; @@ -4391,30 +4307,30 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j, k; int64_t c; - c = ((int64_t)a[67]) * b[67]; - r[135] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 133; k >= 0; k--) { - for (i = 67; i >= 0; i--) { + c = ((int64_t)a[69]) * b[69]; + r[139] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 137; k >= 0; k--) { + for (i = 69; i >= 0; i--) { j = k - i; - if (j >= 68) + if (j >= 70) break; if (j < 0) continue; c += ((int64_t)a[i]) * b[j]; } - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } /* Square a and put result in r. (r = a * a) @@ -4422,18 +4338,18 @@ SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_70(sp_digit* r, const sp_digit* a) { int i, j, k; int64_t c; - c = ((int64_t)a[67]) * a[67]; - r[135] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 133; k >= 0; k--) { - for (i = 67; i >= 0; i--) { + c = ((int64_t)a[69]) * a[69]; + r[139] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 137; k >= 0; k--) { + for (i = 69; i >= 0; i--) { j = k - i; - if (j >= 68 || i <= j) + if (j >= 70 || i <= j) break; if (j < 0) continue; @@ -4443,11 +4359,11 @@ SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) if (i == j) c += ((int64_t)a[i]) * a[i]; - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } #endif /* WOLFSSL_SP_SMALL */ @@ -4467,10 +4383,10 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) x *= 2 - b * x; /* here x*a==1 mod 2**8 */ x *= 2 - b * x; /* here x*a==1 mod 2**16 */ x *= 2 - b * x; /* here x*a==1 mod 2**32 */ - x &= 0x7fffff; + x &= 0x3fffff; /* rho = -1/m mod b */ - *rho = (1L << 23) - x; + *rho = (1L << 22) - x; } /* Multiply a by scalar b into r. (r = a * b) @@ -4479,7 +4395,7 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_140(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4487,37 +4403,43 @@ SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[136] = (sp_digit)t; + r[140] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; for (i = 0; i < 136; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } - r[136] = (sp_digit)(t[7] >> 23); + t[1] = tb * a[137]; + r[137] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[138]; + r[138] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[139]; + r[139] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); + r[140] = (sp_digit)(t[3] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -4529,35 +4451,37 @@ SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, * r A single precision number. * m A signle precision number. */ -static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m) +static void sp_3072_mont_norm_70(sp_digit* r, sp_digit* m) { /* Set r = 2^n - 1. */ #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<67; i++) - r[i] = 0x7fffff; + for (i=0; i<69; i++) + r[i] = 0x3fffff; #else int i; for (i = 0; i < 64; i += 8) { - r[i + 0] = 0x7fffff; - r[i + 1] = 0x7fffff; - r[i + 2] = 0x7fffff; - r[i + 3] = 0x7fffff; - r[i + 4] = 0x7fffff; - r[i + 5] = 0x7fffff; - r[i + 6] = 0x7fffff; - r[i + 7] = 0x7fffff; + r[i + 0] = 0x3fffff; + r[i + 1] = 0x3fffff; + r[i + 2] = 0x3fffff; + r[i + 3] = 0x3fffff; + r[i + 4] = 0x3fffff; + r[i + 5] = 0x3fffff; + r[i + 6] = 0x3fffff; + r[i + 7] = 0x3fffff; } - r[64] = 0x7fffff; - r[65] = 0x7fffff; - r[66] = 0x7fffff; + r[64] = 0x3fffff; + r[65] = 0x3fffff; + r[66] = 0x3fffff; + r[67] = 0x3fffff; + r[68] = 0x3fffff; #endif - r[67] = 0x3ffffl; + r[69] = 0x3ffffl; /* r = (2^n - 1) mod n */ - sp_3072_sub_68(r, r, m); + sp_3072_sub_70(r, r, m); /* Add one so r = 2^n mod m */ r[0] += 1; @@ -4570,17 +4494,19 @@ static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_cmp_70(const sp_digit* a, const sp_digit* b) { sp_digit r = 0; #ifdef WOLFSSL_SP_SMALL int i; - for (i=67; i>=0; i--) + for (i=69; i>=0; i--) r |= (a[i] - b[i]) & (0 - !r); #else int i; + r |= (a[69] - b[69]) & (0 - !r); + r |= (a[68] - b[68]) & (0 - !r); r |= (a[67] - b[67]) & (0 - !r); r |= (a[66] - b[66]) & (0 - !r); r |= (a[65] - b[65]) & (0 - !r); @@ -4608,13 +4534,13 @@ static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - (b[i] & m); #else int i; @@ -4633,6 +4559,8 @@ static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, r[65] = a[65] - (b[65] & m); r[66] = a[66] - (b[66] & m); r[67] = a[67] - (b[67] & m); + r[68] = a[68] - (b[68] & m); + r[69] = a[69] - (b[69] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -4642,7 +4570,7 @@ static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_add_70(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4650,74 +4578,80 @@ SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 68; i++) { + for (i = 0; i < 70; i++) { t += (tb * a[i]) + r[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[68] += t; + r[70] += t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += t[0] & 0x3fffff; for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (t[7] >> 22) + (t[0] & 0x3fffff); } - t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[67]; r[67] += (t[2] >> 23) + (t[3] & 0x7fffff); - r[68] += t[3] >> 23; + t[1] = tb * a[65]; r[65] += (t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[66]; r[66] += (t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[67]; r[67] += (t[2] >> 22) + (t[3] & 0x3fffff); + t[4] = tb * a[68]; r[68] += (t[3] >> 22) + (t[4] & 0x3fffff); + t[5] = tb * a[69]; r[69] += (t[4] >> 22) + (t[5] & 0x3fffff); + r[70] += t[5] >> 22; #endif /* WOLFSSL_SP_SMALL */ } -/* Normalize the values in each word to 23. +/* Normalize the values in each word to 22. * * a Array of sp_digit to normalize. */ -static void sp_3072_norm_68(sp_digit* a) +static void sp_3072_norm_70(sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 67; i++) { - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + for (i = 0; i < 69; i++) { + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else int i; for (i = 0; i < 64; i += 8) { - a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; - a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; - a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; - a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; - a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; - a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; - a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; - a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; - a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + a[i+1] += a[i+0] >> 22; a[i+0] &= 0x3fffff; + a[i+2] += a[i+1] >> 22; a[i+1] &= 0x3fffff; + a[i+3] += a[i+2] >> 22; a[i+2] &= 0x3fffff; + a[i+4] += a[i+3] >> 22; a[i+3] &= 0x3fffff; + a[i+5] += a[i+4] >> 22; a[i+4] &= 0x3fffff; + a[i+6] += a[i+5] >> 22; a[i+5] &= 0x3fffff; + a[i+7] += a[i+6] >> 22; a[i+6] &= 0x3fffff; + a[i+8] += a[i+7] >> 22; a[i+7] &= 0x3fffff; + a[i+9] += a[i+8] >> 22; a[i+8] &= 0x3fffff; } - a[64+1] += a[64] >> 23; - a[64] &= 0x7fffff; - a[65+1] += a[65] >> 23; - a[65] &= 0x7fffff; - a[66+1] += a[66] >> 23; - a[66] &= 0x7fffff; + a[64+1] += a[64] >> 22; + a[64] &= 0x3fffff; + a[65+1] += a[65] >> 22; + a[65] &= 0x3fffff; + a[66+1] += a[66] >> 22; + a[66] &= 0x3fffff; + a[67+1] += a[67] >> 22; + a[67] &= 0x3fffff; + a[68+1] += a[68] >> 22; + a[68] &= 0x3fffff; #endif } @@ -4726,54 +4660,58 @@ static void sp_3072_norm_68(sp_digit* a) * r A single precision number. * a A single precision number. */ -static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a) +static void sp_3072_mont_shift_70(sp_digit* r, const sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; sp_digit n, s; - s = a[68]; - n = a[67] >> 18; - for (i = 0; i < 67; i++) { - n += (s & 0x7fffff) << 5; - r[i] = n & 0x7fffff; - n >>= 23; - s = a[69 + i] + (s >> 23); + s = a[70]; + n = a[69] >> 18; + for (i = 0; i < 69; i++) { + n += (s & 0x3fffff) << 4; + r[i] = n & 0x3fffff; + n >>= 22; + s = a[71 + i] + (s >> 22); } - n += s << 5; - r[67] = n; + n += s << 4; + r[69] = n; #else sp_digit n, s; int i; - s = a[68]; n = a[67] >> 18; + s = a[70]; n = a[69] >> 18; for (i = 0; i < 64; i += 8) { - n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff; - n >>= 23; s = a[i+69] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff; - n >>= 23; s = a[i+70] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff; - n >>= 23; s = a[i+71] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff; - n >>= 23; s = a[i+72] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff; - n >>= 23; s = a[i+73] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff; - n >>= 23; s = a[i+74] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff; - n >>= 23; s = a[i+75] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff; - n >>= 23; s = a[i+76] + (s >> 23); + n += (s & 0x3fffff) << 4; r[i+0] = n & 0x3fffff; + n >>= 22; s = a[i+71] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+1] = n & 0x3fffff; + n >>= 22; s = a[i+72] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+2] = n & 0x3fffff; + n >>= 22; s = a[i+73] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+3] = n & 0x3fffff; + n >>= 22; s = a[i+74] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+4] = n & 0x3fffff; + n >>= 22; s = a[i+75] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+5] = n & 0x3fffff; + n >>= 22; s = a[i+76] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+6] = n & 0x3fffff; + n >>= 22; s = a[i+77] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+7] = n & 0x3fffff; + n >>= 22; s = a[i+78] + (s >> 22); } - n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff; - n >>= 23; s = a[133] + (s >> 23); - n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff; - n >>= 23; s = a[134] + (s >> 23); - n += (s & 0x7fffff) << 5; r[66] = n & 0x7fffff; - n >>= 23; s = a[135] + (s >> 23); - n += s << 5; r[67] = n; + n += (s & 0x3fffff) << 4; r[64] = n & 0x3fffff; + n >>= 22; s = a[135] + (s >> 22); + n += (s & 0x3fffff) << 4; r[65] = n & 0x3fffff; + n >>= 22; s = a[136] + (s >> 22); + n += (s & 0x3fffff) << 4; r[66] = n & 0x3fffff; + n >>= 22; s = a[137] + (s >> 22); + n += (s & 0x3fffff) << 4; r[67] = n & 0x3fffff; + n >>= 22; s = a[138] + (s >> 22); + n += (s & 0x3fffff) << 4; r[68] = n & 0x3fffff; + n >>= 22; s = a[139] + (s >> 22); + n += s << 4; r[69] = n; #endif /* WOLFSSL_SP_SMALL */ - XMEMSET(&r[68], 0, sizeof(*r) * 68); + XMEMSET(&r[70], 0, sizeof(*r) * 70); } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -4782,24 +4720,24 @@ static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp) +static void sp_3072_mont_reduce_70(sp_digit* a, sp_digit* m, sp_digit mp) { int i; sp_digit mu; - for (i=0; i<67; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_68(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<69; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_70(a+i, m, mu); + a[i+1] += a[i] >> 22; } mu = (a[i] * mp) & 0x3ffffl; - sp_3072_mul_add_68(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + sp_3072_mul_add_70(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; - sp_3072_mont_shift_68(a, a); - sp_3072_cond_sub_68(a, a, m, 0 - ((a[67] >> 18) > 0)); - sp_3072_norm_68(a); + sp_3072_mont_shift_70(a, a); + sp_3072_cond_sub_70(a, a, m, 0 - ((a[69] >> 18) > 0)); + sp_3072_norm_70(a); } /* Multiply two Montogmery form numbers mod the modulus (prime). @@ -4811,11 +4749,11 @@ static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp) * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b, +static void sp_3072_mont_mul_70(sp_digit* r, sp_digit* a, sp_digit* b, sp_digit* m, sp_digit mp) { - sp_3072_mul_68(r, a, b); - sp_3072_mont_reduce_68(r, m, mp); + sp_3072_mul_70(r, a, b); + sp_3072_mont_reduce_70(r, m, mp); } /* Square the Montgomery form number. (r = a * a mod m) @@ -4825,11 +4763,11 @@ static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b, * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m, +static void sp_3072_mont_sqr_70(sp_digit* r, sp_digit* a, sp_digit* m, sp_digit mp) { - sp_3072_sqr_68(r, a); - sp_3072_mont_reduce_68(r, m, mp); + sp_3072_sqr_70(r, a); + sp_3072_mont_reduce_70(r, m, mp); } /* Multiply a by scalar b into r. (r = a * b) @@ -4838,7 +4776,7 @@ static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_70(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4846,43 +4784,47 @@ SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 68; i++) { + for (i = 0; i < 70; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[68] = (sp_digit)t; + r[70] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } t[1] = tb * a[65]; - r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[65] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[66]; - r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[66] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[67]; - r[67] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); - r[68] = (sp_digit)(t[3] >> 23); + r[67] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); + t[4] = tb * a[68]; + r[68] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); + t[5] = tb * a[69]; + r[69] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); + r[70] = (sp_digit)(t[5] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -4894,13 +4836,13 @@ SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + (b[i] & m); #else int i; @@ -4919,6 +4861,8 @@ static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, r[65] = a[65] + (b[65] & m); r[66] = a[66] + (b[66] & m); r[67] = a[67] + (b[67] & m); + r[68] = a[68] + (b[68] & m); + r[69] = a[69] + (b[69] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -4929,12 +4873,12 @@ static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - b[i]; return 0; @@ -4948,12 +4892,12 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + b[i]; return 0; @@ -4968,7 +4912,7 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, +static int sp_3072_div_70(sp_digit* a, sp_digit* d, sp_digit* m, sp_digit* r) { int i; @@ -4977,18 +4921,18 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) sp_digit* td; #else - sp_digit t1d[136], t2d[68 + 1]; + sp_digit t1d[140], t2d[70 + 1]; #endif sp_digit* t1; sp_digit* t2; int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, + td = XMALLOC(sizeof(sp_digit) * (3 * 70 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td != NULL) { t1 = td; - t2 = td + 2 * 68; + t2 = td + 2 * 70; } else err = MEMORY_E; @@ -5000,41 +4944,41 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, (void)m; if (err == MP_OKAY) { - div = d[67]; - XMEMCPY(t1, a, sizeof(*t1) * 2 * 68); - for (i=67; i>=0; i--) { - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; - d1 = t1[68 + i]; - d1 <<= 23; - d1 += t1[68 + i - 1]; + div = d[69]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 70); + for (i=69; i>=0; i--) { + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; + d1 = t1[70 + i]; + d1 <<= 22; + d1 += t1[70 + i - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_68(t2, d, r1); - sp_3072_sub_68(&t1[i], &t1[i], t2); - t1[68 + i] -= t2[68]; - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; - r1 = (((-t1[68 + i]) << 23) - t1[68 + i - 1]) / div; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_sub_70(&t1[i], &t1[i], t2); + t1[70 + i] -= t2[70]; + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; + r1 = (((-t1[70 + i]) << 22) - t1[70 + i - 1]) / div; r1++; - sp_3072_mul_d_68(t2, d, r1); - sp_3072_add_68(&t1[i], &t1[i], t2); - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_add_70(&t1[i], &t1[i], t2); + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; } - t1[68 - 1] += t1[68 - 2] >> 23; - t1[68 - 2] &= 0x7fffff; - d1 = t1[68 - 1]; + t1[70 - 1] += t1[70 - 2] >> 22; + t1[70 - 2] &= 0x3fffff; + d1 = t1[70 - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_68(t2, d, r1); - sp_3072_sub_68(t1, t1, t2); - XMEMCPY(r, t1, sizeof(*r) * 2 * 68); - for (i=0; i<66; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_sub_70(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2 * 70); + for (i=0; i<68; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } - sp_3072_cond_add_68(r, r, d, 0 - (r[67] < 0)); + sp_3072_cond_add_70(r, r, d, 0 - (r[69] < 0)); } #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) @@ -5052,9 +4996,9 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, * m A single precision number that is the modulus to reduce with. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m) +static int sp_3072_mod_70(sp_digit* r, sp_digit* a, sp_digit* m) { - return sp_3072_div_68(a, m, NULL, r); + return sp_3072_div_70(a, m, NULL, r); } /* Modular exponentiate a to the e mod m. (r = a^e mod m) @@ -5066,7 +5010,7 @@ static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m) * m A single precision number that is the modulus. * returns 0 on success and MEMORY_E on dynamic memory allocation failure. */ -static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, +static int sp_3072_mod_exp_70(sp_digit* r, sp_digit* a, sp_digit* e, int bits, sp_digit* m, int reduceA) { #ifdef WOLFSSL_SP_SMALL @@ -5079,62 +5023,62 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int c, y; int err = MP_OKAY; - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 70 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { - XMEMSET(td, 0, sizeof(*td) * 3 * 68 * 2); + XMEMSET(td, 0, sizeof(*td) * 3 * 70 * 2); norm = t[0] = td; - t[1] = &td[68 * 2]; - t[2] = &td[2 * 68 * 2]; + t[1] = &td[70 * 2]; + t[2] = &td[2 * 70 * 2]; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); else - XMEMCPY(t[1], a, sizeof(sp_digit) * 68); + XMEMCPY(t[1], a, sizeof(sp_digit) * 70); } if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_70(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), - sizeof(*t[2]) * 68 * 2); - sp_3072_mont_sqr_68(t[2], t[2], m, mp); + sizeof(*t[2]) * 70 * 2); + sp_3072_mont_sqr_70(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], - sizeof(*t[2]) * 68 * 2); + sizeof(*t[2]) * 70 * 2); } - sp_3072_mont_reduce_68(t[0], m, mp); - n = sp_3072_cmp_68(t[0], m); - sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1); - XMEMCPY(r, t[0], sizeof(*r) * 68 * 2); + sp_3072_mont_reduce_70(t[0], m, mp); + n = sp_3072_cmp_70(t[0], m); + sp_3072_cond_sub_70(t[0], t[0], m, (n < 0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } @@ -5144,7 +5088,7 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #elif defined(WOLFSSL_SP_CACHE_RESISTANT) #ifndef WOLFSSL_SMALL_STACK - sp_digit t[3][136]; + sp_digit t[3][140]; #else sp_digit* td; sp_digit* t[3]; @@ -5157,15 +5101,15 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 70 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { t[0] = td; - t[1] = &td[68 * 2]; - t[2] = &td[2 * 68 * 2]; + t[1] = &td[70 * 2]; + t[2] = &td[2 * 70 * 2]; norm = t[0]; } #else @@ -5174,49 +5118,49 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) { - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } } else { - sp_3072_mul_68(t[1], a, norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], a, norm); + err = sp_3072_mod_70(t[1], t[1], m); } } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_70(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); - sp_3072_mont_sqr_68(t[2], t[2], m, mp); + sp_3072_mont_sqr_70(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); } - sp_3072_mont_reduce_68(t[0], m, mp); - n = sp_3072_cmp_68(t[0], m); - sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1); + sp_3072_mont_reduce_70(t[0], m, mp); + n = sp_3072_cmp_70(t[0], m); + sp_3072_cond_sub_70(t[0], t[0], m, (n < 0) - 1); XMEMCPY(r, t[0], sizeof(t[0])); } @@ -5228,13 +5172,13 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit t[32][136]; + sp_digit t[32][140]; #else sp_digit* t[32]; sp_digit* td; #endif sp_digit* norm; - sp_digit rt[136]; + sp_digit rt[140]; sp_digit mp = 1; sp_digit n; int i; @@ -5242,14 +5186,14 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 136, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 140, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { for (i=0; i<32; i++) - t[i] = td + i * 136; + t[i] = td + i * 140; norm = t[0]; } #else @@ -5258,67 +5202,67 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) { - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } } else { - sp_3072_mul_68(t[1], a, norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], a, norm); + err = sp_3072_mod_70(t[1], t[1], m); } } if (err == MP_OKAY) { - sp_3072_mont_sqr_68(t[ 2], t[ 1], m, mp); - sp_3072_mont_mul_68(t[ 3], t[ 2], t[ 1], m, mp); - sp_3072_mont_sqr_68(t[ 4], t[ 2], m, mp); - sp_3072_mont_mul_68(t[ 5], t[ 3], t[ 2], m, mp); - sp_3072_mont_sqr_68(t[ 6], t[ 3], m, mp); - sp_3072_mont_mul_68(t[ 7], t[ 4], t[ 3], m, mp); - sp_3072_mont_sqr_68(t[ 8], t[ 4], m, mp); - sp_3072_mont_mul_68(t[ 9], t[ 5], t[ 4], m, mp); - sp_3072_mont_sqr_68(t[10], t[ 5], m, mp); - sp_3072_mont_mul_68(t[11], t[ 6], t[ 5], m, mp); - sp_3072_mont_sqr_68(t[12], t[ 6], m, mp); - sp_3072_mont_mul_68(t[13], t[ 7], t[ 6], m, mp); - sp_3072_mont_sqr_68(t[14], t[ 7], m, mp); - sp_3072_mont_mul_68(t[15], t[ 8], t[ 7], m, mp); - sp_3072_mont_sqr_68(t[16], t[ 8], m, mp); - sp_3072_mont_mul_68(t[17], t[ 9], t[ 8], m, mp); - sp_3072_mont_sqr_68(t[18], t[ 9], m, mp); - sp_3072_mont_mul_68(t[19], t[10], t[ 9], m, mp); - sp_3072_mont_sqr_68(t[20], t[10], m, mp); - sp_3072_mont_mul_68(t[21], t[11], t[10], m, mp); - sp_3072_mont_sqr_68(t[22], t[11], m, mp); - sp_3072_mont_mul_68(t[23], t[12], t[11], m, mp); - sp_3072_mont_sqr_68(t[24], t[12], m, mp); - sp_3072_mont_mul_68(t[25], t[13], t[12], m, mp); - sp_3072_mont_sqr_68(t[26], t[13], m, mp); - sp_3072_mont_mul_68(t[27], t[14], t[13], m, mp); - sp_3072_mont_sqr_68(t[28], t[14], m, mp); - sp_3072_mont_mul_68(t[29], t[15], t[14], m, mp); - sp_3072_mont_sqr_68(t[30], t[15], m, mp); - sp_3072_mont_mul_68(t[31], t[16], t[15], m, mp); + sp_3072_mont_sqr_70(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_70(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_70(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_70(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_70(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_70(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_70(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_70(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_70(t[10], t[ 5], m, mp); + sp_3072_mont_mul_70(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_70(t[12], t[ 6], m, mp); + sp_3072_mont_mul_70(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_70(t[14], t[ 7], m, mp); + sp_3072_mont_mul_70(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_70(t[16], t[ 8], m, mp); + sp_3072_mont_mul_70(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_70(t[18], t[ 9], m, mp); + sp_3072_mont_mul_70(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_70(t[20], t[10], m, mp); + sp_3072_mont_mul_70(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_70(t[22], t[11], m, mp); + sp_3072_mont_mul_70(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_70(t[24], t[12], m, mp); + sp_3072_mont_mul_70(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_70(t[26], t[13], m, mp); + sp_3072_mont_mul_70(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_70(t[28], t[14], m, mp); + sp_3072_mont_mul_70(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_70(t[30], t[15], m, mp); + sp_3072_mont_mul_70(t[31], t[16], t[15], m, mp); bits = ((bits + 4) / 5) * 5; - i = ((bits + 22) / 23) - 1; - c = bits % 23; + i = ((bits + 21) / 22) - 1; + c = bits % 22; if (c == 0) - c = 23; - if (i < 68) + c = 22; + if (i < 70) n = e[i--] << (32 - c); else { n = 0; i--; } if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; @@ -5326,25 +5270,25 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, XMEMCPY(rt, t[y], sizeof(rt)); for (; i>=0 || c>=5; ) { if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; c -= 5; - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); - sp_3072_mont_mul_68(rt, rt, t[y], m, mp); + sp_3072_mont_mul_70(rt, rt, t[y], m, mp); } - sp_3072_mont_reduce_68(rt, m, mp); - n = sp_3072_cmp_68(rt, m); - sp_3072_cond_sub_68(rt, rt, m, (n < 0) - 1); + sp_3072_mont_reduce_70(rt, m, mp); + n = sp_3072_cmp_70(rt, m); + sp_3072_cond_sub_70(rt, rt, m, (n < 0) - 1); XMEMCPY(r, rt, sizeof(rt)); } @@ -5365,32 +5309,35 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, * r A single precision number. * m A signle precision number. */ -static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m) +static void sp_3072_mont_norm_140(sp_digit* r, sp_digit* m) { /* Set r = 2^n - 1. */ #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<135; i++) - r[i] = 0x7fffff; + for (i=0; i<139; i++) + r[i] = 0x3fffff; #else int i; for (i = 0; i < 136; i += 8) { - r[i + 0] = 0x7fffff; - r[i + 1] = 0x7fffff; - r[i + 2] = 0x7fffff; - r[i + 3] = 0x7fffff; - r[i + 4] = 0x7fffff; - r[i + 5] = 0x7fffff; - r[i + 6] = 0x7fffff; - r[i + 7] = 0x7fffff; + r[i + 0] = 0x3fffff; + r[i + 1] = 0x3fffff; + r[i + 2] = 0x3fffff; + r[i + 3] = 0x3fffff; + r[i + 4] = 0x3fffff; + r[i + 5] = 0x3fffff; + r[i + 6] = 0x3fffff; + r[i + 7] = 0x3fffff; } + r[136] = 0x3fffff; + r[137] = 0x3fffff; + r[138] = 0x3fffff; #endif - r[135] = 0x1fffl; + r[139] = 0x3fffl; /* r = (2^n - 1) mod n */ - sp_3072_sub_136(r, r, m); + sp_3072_sub_140(r, r, m); /* Add one so r = 2^n mod m */ r[0] += 1; @@ -5403,17 +5350,21 @@ static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_cmp_140(const sp_digit* a, const sp_digit* b) { sp_digit r = 0; #ifdef WOLFSSL_SP_SMALL int i; - for (i=135; i>=0; i--) + for (i=139; i>=0; i--) r |= (a[i] - b[i]) & (0 - !r); #else int i; + r |= (a[139] - b[139]) & (0 - !r); + r |= (a[138] - b[138]) & (0 - !r); + r |= (a[137] - b[137]) & (0 - !r); + r |= (a[136] - b[136]) & (0 - !r); for (i = 128; i >= 0; i -= 8) { r |= (a[i + 7] - b[i + 7]) & (0 - !r); r |= (a[i + 6] - b[i + 6]) & (0 - !r); @@ -5437,13 +5388,13 @@ static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - (b[i] & m); #else int i; @@ -5458,6 +5409,10 @@ static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] - (b[i + 6] & m); r[i + 7] = a[i + 7] - (b[i + 7] & m); } + r[136] = a[136] - (b[136] & m); + r[137] = a[137] - (b[137] & m); + r[138] = a[138] - (b[138] & m); + r[139] = a[139] - (b[139] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -5467,7 +5422,7 @@ static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_add_140(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -5475,79 +5430,74 @@ SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { t += (tb * a[i]) + r[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[136] += t; + r[140] += t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += t[0] & 0x3fffff; for (i = 0; i < 136; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (t[7] >> 22) + (t[0] & 0x3fffff); } - r[136] += t[7] >> 23; + t[1] = tb * a[137]; r[137] += (t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[138]; r[138] += (t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[139]; r[139] += (t[2] >> 22) + (t[3] & 0x3fffff); + r[140] += t[3] >> 22; #endif /* WOLFSSL_SP_SMALL */ } -/* Normalize the values in each word to 23. +/* Normalize the values in each word to 22. * * a Array of sp_digit to normalize. */ -static void sp_3072_norm_136(sp_digit* a) +static void sp_3072_norm_140(sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 135; i++) { - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + for (i = 0; i < 139; i++) { + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else int i; - for (i = 0; i < 128; i += 8) { - a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; - a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; - a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; - a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; - a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; - a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; - a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; - a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; - a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + for (i = 0; i < 136; i += 8) { + a[i+1] += a[i+0] >> 22; a[i+0] &= 0x3fffff; + a[i+2] += a[i+1] >> 22; a[i+1] &= 0x3fffff; + a[i+3] += a[i+2] >> 22; a[i+2] &= 0x3fffff; + a[i+4] += a[i+3] >> 22; a[i+3] &= 0x3fffff; + a[i+5] += a[i+4] >> 22; a[i+4] &= 0x3fffff; + a[i+6] += a[i+5] >> 22; a[i+5] &= 0x3fffff; + a[i+7] += a[i+6] >> 22; a[i+6] &= 0x3fffff; + a[i+8] += a[i+7] >> 22; a[i+7] &= 0x3fffff; + a[i+9] += a[i+8] >> 22; a[i+8] &= 0x3fffff; } - a[128+1] += a[128] >> 23; - a[128] &= 0x7fffff; - a[129+1] += a[129] >> 23; - a[129] &= 0x7fffff; - a[130+1] += a[130] >> 23; - a[130] &= 0x7fffff; - a[131+1] += a[131] >> 23; - a[131] &= 0x7fffff; - a[132+1] += a[132] >> 23; - a[132] &= 0x7fffff; - a[133+1] += a[133] >> 23; - a[133] &= 0x7fffff; - a[134+1] += a[134] >> 23; - a[134] &= 0x7fffff; + a[136+1] += a[136] >> 22; + a[136] &= 0x3fffff; + a[137+1] += a[137] >> 22; + a[137] &= 0x3fffff; + a[138+1] += a[138] >> 22; + a[138] &= 0x3fffff; #endif } @@ -5556,44 +5506,54 @@ static void sp_3072_norm_136(sp_digit* a) * r A single precision number. * a A single precision number. */ -static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a) +static void sp_3072_mont_shift_140(sp_digit* r, const sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - int64_t n = a[135] >> 13; - n += ((int64_t)a[136]) << 10; + sp_digit n, s; - for (i = 0; i < 135; i++) { - r[i] = n & 0x7fffff; - n >>= 23; - n += ((int64_t)a[137 + i]) << 10; + s = a[140]; + n = a[139] >> 14; + for (i = 0; i < 139; i++) { + n += (s & 0x3fffff) << 8; + r[i] = n & 0x3fffff; + n >>= 22; + s = a[141 + i] + (s >> 22); } - r[135] = (sp_digit)n; + n += s << 8; + r[139] = n; #else + sp_digit n, s; int i; - int64_t n = a[135] >> 13; - n += ((int64_t)a[136]) << 10; + + s = a[140]; n = a[139] >> 14; for (i = 0; i < 136; i += 8) { - r[i + 0] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 137]) << 10; - r[i + 1] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 138]) << 10; - r[i + 2] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 139]) << 10; - r[i + 3] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 140]) << 10; - r[i + 4] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 141]) << 10; - r[i + 5] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 142]) << 10; - r[i + 6] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 143]) << 10; - r[i + 7] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 144]) << 10; + n += (s & 0x3fffff) << 8; r[i+0] = n & 0x3fffff; + n >>= 22; s = a[i+141] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+1] = n & 0x3fffff; + n >>= 22; s = a[i+142] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+2] = n & 0x3fffff; + n >>= 22; s = a[i+143] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+3] = n & 0x3fffff; + n >>= 22; s = a[i+144] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+4] = n & 0x3fffff; + n >>= 22; s = a[i+145] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+5] = n & 0x3fffff; + n >>= 22; s = a[i+146] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+6] = n & 0x3fffff; + n >>= 22; s = a[i+147] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+7] = n & 0x3fffff; + n >>= 22; s = a[i+148] + (s >> 22); } - r[135] = (sp_digit)n; + n += (s & 0x3fffff) << 8; r[136] = n & 0x3fffff; + n >>= 22; s = a[277] + (s >> 22); + n += (s & 0x3fffff) << 8; r[137] = n & 0x3fffff; + n >>= 22; s = a[278] + (s >> 22); + n += (s & 0x3fffff) << 8; r[138] = n & 0x3fffff; + n >>= 22; s = a[279] + (s >> 22); + n += s << 8; r[139] = n; #endif /* WOLFSSL_SP_SMALL */ - XMEMSET(&r[136], 0, sizeof(*r) * 136); + XMEMSET(&r[140], 0, sizeof(*r) * 140); } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -5602,49 +5562,49 @@ static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) +static void sp_3072_mont_reduce_140(sp_digit* a, sp_digit* m, sp_digit mp) { int i; sp_digit mu; #ifdef WOLFSSL_SP_DH if (mp != 1) { - for (i=0; i<135; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = (a[i] * mp) & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = (a[i] * mp) & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } else { - for (i=0; i<135; i++) { - mu = a[i] & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = a[i] & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = a[i] & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = a[i] & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else - for (i=0; i<135; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = (a[i] * mp) & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = (a[i] * mp) & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; #endif - sp_3072_mont_shift_136(a, a); - sp_3072_cond_sub_136(a, a, m, 0 - ((a[135] >> 13) > 0)); - sp_3072_norm_136(a); + sp_3072_mont_shift_140(a, a); + sp_3072_cond_sub_140(a, a, m, 0 - ((a[139] >> 14) > 0)); + sp_3072_norm_140(a); } /* Multiply two Montogmery form numbers mod the modulus (prime). @@ -5656,11 +5616,11 @@ static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b, +static void sp_3072_mont_mul_140(sp_digit* r, sp_digit* a, sp_digit* b, sp_digit* m, sp_digit mp) { - sp_3072_mul_136(r, a, b); - sp_3072_mont_reduce_136(r, m, mp); + sp_3072_mul_140(r, a, b); + sp_3072_mont_reduce_140(r, m, mp); } /* Square the Montgomery form number. (r = a * a mod m) @@ -5670,11 +5630,11 @@ static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b, * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m, +static void sp_3072_mont_sqr_140(sp_digit* r, sp_digit* a, sp_digit* m, sp_digit mp) { - sp_3072_sqr_136(r, a); - sp_3072_mont_reduce_136(r, m, mp); + sp_3072_sqr_140(r, a); + sp_3072_mont_reduce_140(r, m, mp); } /* Multiply a by scalar b into r. (r = a * b) @@ -5683,7 +5643,7 @@ static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_280(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -5691,37 +5651,37 @@ SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 272; i++) { + for (i = 0; i < 280; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[272] = (sp_digit)t; + r[280] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; - for (i = 0; i < 272; i += 8) { + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; + for (i = 0; i < 280; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } - r[272] = (sp_digit)(t[7] >> 23); + r[280] = (sp_digit)(t[7] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -5733,13 +5693,13 @@ SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + (b[i] & m); #else int i; @@ -5754,6 +5714,10 @@ static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } + r[136] = a[136] + (b[136] & m); + r[137] = a[137] + (b[137] & m); + r[138] = a[138] + (b[138] & m); + r[139] = a[139] + (b[139] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -5764,12 +5728,12 @@ static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - b[i]; return 0; @@ -5783,37 +5747,40 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + b[i]; return 0; } #endif -SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n) +SP_NOINLINE static void sp_3072_rshift_140(sp_digit* r, sp_digit* a, byte n) { int i; #ifdef WOLFSSL_SP_SMALL - for (i=0; i<135; i++) - r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; + for (i=0; i<139; i++) + r[i] = ((a[i] >> n) | (a[i + 1] << (22 - n))) & 0x3fffff; #else - for (i=0; i<128; i += 8) { - r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; - r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; - r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; - r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; - r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; - r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; - r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; - r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + for (i=0; i<136; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (22 - n))) & 0x3fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (22 - n))) & 0x3fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (22 - n))) & 0x3fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (22 - n))) & 0x3fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (22 - n))) & 0x3fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (22 - n))) & 0x3fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (22 - n))) & 0x3fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (22 - n))) & 0x3fffff; } + r[136] = ((a[136] >> n) | (a[137] << (22 - n))) & 0x3fffff; + r[137] = ((a[137] >> n) | (a[138] << (22 - n))) & 0x3fffff; + r[138] = ((a[138] >> n) | (a[139] << (22 - n))) & 0x3fffff; #endif - r[135] = a[135] >> n; + r[139] = a[139] >> n; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -5825,7 +5792,7 @@ SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n) * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, +static int sp_3072_div_140(sp_digit* a, sp_digit* d, sp_digit* m, sp_digit* r) { int i; @@ -5834,7 +5801,7 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) sp_digit* td; #else - sp_digit t1d[272 + 1], t2d[136 + 1], sdd[136 + 1]; + sp_digit t1d[280 + 1], t2d[140 + 1], sdd[140 + 1]; #endif sp_digit* t1; sp_digit* t2; @@ -5842,12 +5809,12 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, + td = XMALLOC(sizeof(sp_digit) * (4 * 140 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td != NULL) { t1 = td; - t2 = td + 272 + 1; - sd = t2 + 136 + 1; + t2 = td + 280 + 1; + sd = t2 + 140 + 1; } else err = MEMORY_E; @@ -5860,45 +5827,45 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, (void)m; if (err == MP_OKAY) { - sp_3072_mul_d_136(sd, d, 1 << 10); - sp_3072_mul_d_272(t1, a, 1 << 10); - div = sd[135]; - for (i=136; i>=0; i--) { - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; - d1 = t1[136 + i]; - d1 <<= 23; - d1 += t1[136 + i - 1]; + sp_3072_mul_d_140(sd, d, 1 << 8); + sp_3072_mul_d_280(t1, a, 1 << 8); + div = sd[139]; + for (i=140; i>=0; i--) { + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; + d1 = t1[140 + i]; + d1 <<= 22; + d1 += t1[140 + i - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_sub_136(&t1[i], &t1[i], t2); - t1[136 + i] -= t2[136]; - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; - r1 = (((-t1[136 + i]) << 23) - t1[136 + i - 1]) / div; - r1 -= t1[136 + i]; - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_add_136(&t1[i], &t1[i], t2); - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_sub_140(&t1[i], &t1[i], t2); + t1[140 + i] -= t2[140]; + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; + r1 = (((-t1[140 + i]) << 22) - t1[140 + i - 1]) / div; + r1 -= t1[140 + i]; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_add_140(&t1[i], &t1[i], t2); + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; } - t1[136 - 1] += t1[136 - 2] >> 23; - t1[136 - 2] &= 0x7fffff; - d1 = t1[136 - 1]; + t1[140 - 1] += t1[140 - 2] >> 22; + t1[140 - 2] &= 0x3fffff; + d1 = t1[140 - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_sub_136(t1, t1, t2); - XMEMCPY(r, t1, sizeof(*r) * 2 * 136); - for (i=0; i<134; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_sub_140(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2 * 140); + for (i=0; i<138; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } - sp_3072_cond_add_136(r, r, sd, 0 - (r[135] < 0)); + sp_3072_cond_add_140(r, r, sd, 0 - (r[139] < 0)); } - sp_3072_rshift_136(r, r, 10); + sp_3072_rshift_140(r, r, 8); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (td != NULL) @@ -5915,9 +5882,9 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, * m A single precision number that is the modulus to reduce with. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m) +static int sp_3072_mod_140(sp_digit* r, sp_digit* a, sp_digit* m) { - return sp_3072_div_136(a, m, NULL, r); + return sp_3072_div_140(a, m, NULL, r); } #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH) @@ -5930,7 +5897,7 @@ static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m) * m A single precision number that is the modulus. * returns 0 on success and MEMORY_E on dynamic memory allocation failure. */ -static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, +static int sp_3072_mod_exp_140(sp_digit* r, sp_digit* a, sp_digit* e, int bits, sp_digit* m, int reduceA) { #ifdef WOLFSSL_SP_SMALL @@ -5943,62 +5910,62 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int c, y; int err = MP_OKAY; - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 140 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { - XMEMSET(td, 0, sizeof(*td) * 3 * 136 * 2); + XMEMSET(td, 0, sizeof(*td) * 3 * 140 * 2); norm = t[0] = td; - t[1] = &td[136 * 2]; - t[2] = &td[2 * 136 * 2]; + t[1] = &td[140 * 2]; + t[2] = &td[2 * 140 * 2]; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); else - XMEMCPY(t[1], a, sizeof(sp_digit) * 136); + XMEMCPY(t[1], a, sizeof(sp_digit) * 140); } if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_140(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), - sizeof(*t[2]) * 136 * 2); - sp_3072_mont_sqr_136(t[2], t[2], m, mp); + sizeof(*t[2]) * 140 * 2); + sp_3072_mont_sqr_140(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], - sizeof(*t[2]) * 136 * 2); + sizeof(*t[2]) * 140 * 2); } - sp_3072_mont_reduce_136(t[0], m, mp); - n = sp_3072_cmp_136(t[0], m); - sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1); - XMEMCPY(r, t[0], sizeof(*r) * 136 * 2); + sp_3072_mont_reduce_140(t[0], m, mp); + n = sp_3072_cmp_140(t[0], m); + sp_3072_cond_sub_140(t[0], t[0], m, (n < 0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 140 * 2); } @@ -6008,7 +5975,7 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #elif defined(WOLFSSL_SP_CACHE_RESISTANT) #ifndef WOLFSSL_SMALL_STACK - sp_digit t[3][272]; + sp_digit t[3][280]; #else sp_digit* td; sp_digit* t[3]; @@ -6021,15 +5988,15 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 140 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { t[0] = td; - t[1] = &td[136 * 2]; - t[2] = &td[2 * 136 * 2]; + t[1] = &td[140 * 2]; + t[2] = &td[2 * 140 * 2]; norm = t[0]; } #else @@ -6038,49 +6005,49 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) { - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } } else { - sp_3072_mul_136(t[1], a, norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], a, norm); + err = sp_3072_mod_140(t[1], t[1], m); } } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_140(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); - sp_3072_mont_sqr_136(t[2], t[2], m, mp); + sp_3072_mont_sqr_140(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); } - sp_3072_mont_reduce_136(t[0], m, mp); - n = sp_3072_cmp_136(t[0], m); - sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1); + sp_3072_mont_reduce_140(t[0], m, mp); + n = sp_3072_cmp_140(t[0], m); + sp_3072_cond_sub_140(t[0], t[0], m, (n < 0) - 1); XMEMCPY(r, t[0], sizeof(t[0])); } @@ -6092,13 +6059,13 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit t[32][272]; + sp_digit t[32][280]; #else sp_digit* t[32]; sp_digit* td; #endif sp_digit* norm; - sp_digit rt[272]; + sp_digit rt[280]; sp_digit mp = 1; sp_digit n; int i; @@ -6106,14 +6073,14 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 272, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 280, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { for (i=0; i<32; i++) - t[i] = td + i * 272; + t[i] = td + i * 280; norm = t[0]; } #else @@ -6122,67 +6089,67 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) { - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } } else { - sp_3072_mul_136(t[1], a, norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], a, norm); + err = sp_3072_mod_140(t[1], t[1], m); } } if (err == MP_OKAY) { - sp_3072_mont_sqr_136(t[ 2], t[ 1], m, mp); - sp_3072_mont_mul_136(t[ 3], t[ 2], t[ 1], m, mp); - sp_3072_mont_sqr_136(t[ 4], t[ 2], m, mp); - sp_3072_mont_mul_136(t[ 5], t[ 3], t[ 2], m, mp); - sp_3072_mont_sqr_136(t[ 6], t[ 3], m, mp); - sp_3072_mont_mul_136(t[ 7], t[ 4], t[ 3], m, mp); - sp_3072_mont_sqr_136(t[ 8], t[ 4], m, mp); - sp_3072_mont_mul_136(t[ 9], t[ 5], t[ 4], m, mp); - sp_3072_mont_sqr_136(t[10], t[ 5], m, mp); - sp_3072_mont_mul_136(t[11], t[ 6], t[ 5], m, mp); - sp_3072_mont_sqr_136(t[12], t[ 6], m, mp); - sp_3072_mont_mul_136(t[13], t[ 7], t[ 6], m, mp); - sp_3072_mont_sqr_136(t[14], t[ 7], m, mp); - sp_3072_mont_mul_136(t[15], t[ 8], t[ 7], m, mp); - sp_3072_mont_sqr_136(t[16], t[ 8], m, mp); - sp_3072_mont_mul_136(t[17], t[ 9], t[ 8], m, mp); - sp_3072_mont_sqr_136(t[18], t[ 9], m, mp); - sp_3072_mont_mul_136(t[19], t[10], t[ 9], m, mp); - sp_3072_mont_sqr_136(t[20], t[10], m, mp); - sp_3072_mont_mul_136(t[21], t[11], t[10], m, mp); - sp_3072_mont_sqr_136(t[22], t[11], m, mp); - sp_3072_mont_mul_136(t[23], t[12], t[11], m, mp); - sp_3072_mont_sqr_136(t[24], t[12], m, mp); - sp_3072_mont_mul_136(t[25], t[13], t[12], m, mp); - sp_3072_mont_sqr_136(t[26], t[13], m, mp); - sp_3072_mont_mul_136(t[27], t[14], t[13], m, mp); - sp_3072_mont_sqr_136(t[28], t[14], m, mp); - sp_3072_mont_mul_136(t[29], t[15], t[14], m, mp); - sp_3072_mont_sqr_136(t[30], t[15], m, mp); - sp_3072_mont_mul_136(t[31], t[16], t[15], m, mp); + sp_3072_mont_sqr_140(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_140(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_140(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_140(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_140(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_140(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_140(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_140(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_140(t[10], t[ 5], m, mp); + sp_3072_mont_mul_140(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_140(t[12], t[ 6], m, mp); + sp_3072_mont_mul_140(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_140(t[14], t[ 7], m, mp); + sp_3072_mont_mul_140(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_140(t[16], t[ 8], m, mp); + sp_3072_mont_mul_140(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_140(t[18], t[ 9], m, mp); + sp_3072_mont_mul_140(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_140(t[20], t[10], m, mp); + sp_3072_mont_mul_140(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_140(t[22], t[11], m, mp); + sp_3072_mont_mul_140(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_140(t[24], t[12], m, mp); + sp_3072_mont_mul_140(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_140(t[26], t[13], m, mp); + sp_3072_mont_mul_140(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_140(t[28], t[14], m, mp); + sp_3072_mont_mul_140(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_140(t[30], t[15], m, mp); + sp_3072_mont_mul_140(t[31], t[16], t[15], m, mp); bits = ((bits + 4) / 5) * 5; - i = ((bits + 22) / 23) - 1; - c = bits % 23; + i = ((bits + 21) / 22) - 1; + c = bits % 22; if (c == 0) - c = 23; - if (i < 136) + c = 22; + if (i < 140) n = e[i--] << (32 - c); else { n = 0; i--; } if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; @@ -6190,25 +6157,25 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, XMEMCPY(rt, t[y], sizeof(rt)); for (; i>=0 || c>=5; ) { if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; c -= 5; - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); - sp_3072_mont_mul_136(rt, rt, t[y], m, mp); + sp_3072_mont_mul_140(rt, rt, t[y], m, mp); } - sp_3072_mont_reduce_136(rt, m, mp); - n = sp_3072_cmp_136(rt, m); - sp_3072_cond_sub_136(rt, rt, m, (n < 0) - 1); + sp_3072_mont_reduce_140(rt, m, mp); + n = sp_3072_cmp_140(rt, m); + sp_3072_cond_sub_140(rt, rt, m, (n < 0) - 1); XMEMCPY(r, rt, sizeof(rt)); } @@ -6230,12 +6197,12 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, * a A single precision integer. * m Mask to AND against each digit. */ -static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m) +static void sp_3072_mask_70(sp_digit* r, sp_digit* a, sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<68; i++) + for (i=0; i<70; i++) r[i] = a[i] & m; #else int i; @@ -6254,6 +6221,8 @@ static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m) r[65] = a[65] & m; r[66] = a[66] & m; r[67] = a[67] & m; + r[68] = a[68] & m; + r[69] = a[69] & m; #endif } @@ -6287,12 +6256,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (*outLen < 384) err = MP_TO_E; - if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 || + if (err == MP_OKAY && (mp_count_bits(em) > 22 || inLen > 384 || mp_count_bits(mm) != 3072)) err = MP_READ_E; if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 5, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; @@ -6300,12 +6269,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (err == MP_OKAY) { a = d; - r = a + 136 * 2; - m = r + 136 * 2; + r = a + 140 * 2; + m = r + 140 * 2; norm = r; - sp_3072_from_bin(a, 136, in, inLen); -#if DIGIT_BIT >= 23 + sp_3072_from_bin(a, 140, in, inLen); +#if DIGIT_BIT >= 22 e[0] = em->dp[0]; #else e[0] = em->dp[0]; @@ -6317,30 +6286,30 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } if (err == MP_OKAY) { - sp_3072_from_mp(m, 136, mm); + sp_3072_from_mp(m, 140, mm); sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); } if (err == MP_OKAY) { - sp_3072_mul_136(a, a, norm); - err = sp_3072_mod_136(a, a, m); + sp_3072_mul_140(a, a, norm); + err = sp_3072_mod_140(a, a, m); } if (err == MP_OKAY) { - for (i=22; i>=0; i--) + for (i=21; i>=0; i--) if (e[0] >> i) break; - XMEMCPY(r, a, sizeof(sp_digit) * 136 * 2); + XMEMCPY(r, a, sizeof(sp_digit) * 140 * 2); for (i--; i>=0; i--) { - sp_3072_mont_sqr_136(r, r, m, mp); + sp_3072_mont_sqr_140(r, r, m, mp); if (((e[0] >> i) & 1) == 1) - sp_3072_mont_mul_136(r, r, a, m, mp); + sp_3072_mont_mul_140(r, r, a, m, mp); } - sp_3072_mont_reduce_136(r, m, mp); - mp = sp_3072_cmp_136(r, m); - sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1); + sp_3072_mont_reduce_140(r, m, mp); + mp = sp_3072_cmp_140(r, m); + sp_3072_cond_sub_140(r, r, m, (mp < 0) - 1); sp_3072_to_bin(r, out); *outLen = 384; @@ -6352,7 +6321,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; #else #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) - sp_digit ad[272], md[136], rd[272]; + sp_digit ad[280], md[140], rd[280]; #else sp_digit* d = NULL; #endif @@ -6364,13 +6333,13 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (*outLen < 384) err = MP_TO_E; - if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 || + if (err == MP_OKAY && (mp_count_bits(em) > 22 || inLen > 384 || mp_count_bits(mm) != 3072)) err = MP_READ_E; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 5, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; @@ -6378,8 +6347,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (err == MP_OKAY) { a = d; - r = a + 136 * 2; - m = r + 136 * 2; + r = a + 140 * 2; + m = r + 140 * 2; } #else a = ad; @@ -6388,8 +6357,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); -#if DIGIT_BIT >= 23 + sp_3072_from_bin(a, 140, in, inLen); +#if DIGIT_BIT >= 22 e[0] = em->dp[0]; #else e[0] = em->dp[0]; @@ -6400,16 +6369,16 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, err = MP_EXPTMOD_E; } if (err == MP_OKAY) { - sp_3072_from_mp(m, 136, mm); + sp_3072_from_mp(m, 140, mm); if (e[0] == 0x3) { if (err == MP_OKAY) { - sp_3072_sqr_136(r, a); - err = sp_3072_mod_136(r, r, m); + sp_3072_sqr_140(r, a); + err = sp_3072_mod_140(r, r, m); } if (err == MP_OKAY) { - sp_3072_mul_136(r, a, r); - err = sp_3072_mod_136(r, r, m); + sp_3072_mul_140(r, a, r); + err = sp_3072_mod_140(r, r, m); } } else { @@ -6418,28 +6387,28 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, sp_digit mp; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (err == MP_OKAY) { - sp_3072_mul_136(a, a, norm); - err = sp_3072_mod_136(a, a, m); + sp_3072_mul_140(a, a, norm); + err = sp_3072_mod_140(a, a, m); } if (err == MP_OKAY) { - for (i=22; i>=0; i--) + for (i=21; i>=0; i--) if (e[0] >> i) break; - XMEMCPY(r, a, sizeof(sp_digit) * 272); + XMEMCPY(r, a, sizeof(sp_digit) * 280); for (i--; i>=0; i--) { - sp_3072_mont_sqr_136(r, r, m, mp); + sp_3072_mont_sqr_140(r, r, m, mp); if (((e[0] >> i) & 1) == 1) - sp_3072_mont_mul_136(r, r, a, m, mp); + sp_3072_mont_mul_140(r, r, a, m, mp); } - sp_3072_mont_reduce_136(r, m, mp); - mp = sp_3072_cmp_136(r, m); - sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1); + sp_3072_mont_reduce_140(r, m, mp); + mp = sp_3072_cmp_140(r, m); + sp_3072_cond_sub_140(r, r, m, (mp < 0) - 1); } } } @@ -6501,20 +6470,20 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 4, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 4, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { - a = d + 136; - m = a + 136; + a = d + 140; + m = a + 140; r = a; - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(d, 136, dm); - sp_3072_from_mp(m, 136, mm); - err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(d, 140, dm); + sp_3072_from_mp(m, 140, mm); + err = sp_3072_mod_exp_140(r, a, d, 3072, m, 0); } if (err == MP_OKAY) { sp_3072_to_bin(r, out); @@ -6522,13 +6491,13 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, } if (d != NULL) { - XMEMSET(d, 0, sizeof(sp_digit) * 136); + XMEMSET(d, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_RSA); } return err; #else - sp_digit a[272], d[136], m[136]; + sp_digit a[280], d[140], m[140]; sp_digit* r = a; int err = MP_OKAY; @@ -6545,10 +6514,10 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(d, 136, dm); - sp_3072_from_mp(m, 136, mm); - err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(d, 140, dm); + sp_3072_from_mp(m, 140, mm); + err = sp_3072_mod_exp_140(r, a, d, 3072, m, 0); } if (err == MP_OKAY) { @@ -6556,7 +6525,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, *outLen = 384; } - XMEMSET(d, 0, sizeof(sp_digit) * 136); + XMEMSET(d, 0, sizeof(sp_digit) * 140); return err; #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ @@ -6584,61 +6553,61 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 68 * 11, NULL, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 11, NULL, DYNAMIC_TYPE_RSA); if (t == NULL) err = MEMORY_E; } if (err == MP_OKAY) { a = t; - p = a + 136 * 2; - q = p + 68; - qi = dq = dp = q + 68; - tmpa = qi + 68; - tmpb = tmpa + 136; + p = a + 140 * 2; + q = p + 70; + qi = dq = dp = q + 70; + tmpa = qi + 70; + tmpb = tmpa + 140; tmp = t; - r = tmp + 136; + r = tmp + 140; - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(p, 68, pm); - sp_3072_from_mp(q, 68, qm); - sp_3072_from_mp(dp, 68, dpm); - err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(p, 70, pm); + sp_3072_from_mp(q, 70, qm); + sp_3072_from_mp(dp, 70, dpm); + err = sp_3072_mod_exp_70(tmpa, a, dp, 1536, p, 1); } if (err == MP_OKAY) { - sp_3072_from_mp(dq, 68, dqm); - err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1); + sp_3072_from_mp(dq, 70, dqm); + err = sp_3072_mod_exp_70(tmpb, a, dq, 1536, q, 1); } if (err == MP_OKAY) { - sp_3072_sub_68(tmpa, tmpa, tmpb); - sp_3072_mask_68(tmp, p, tmpa[67] >> 31); - sp_3072_add_68(tmpa, tmpa, tmp); + sp_3072_sub_70(tmpa, tmpa, tmpb); + sp_3072_mask_70(tmp, p, tmpa[69] >> 31); + sp_3072_add_70(tmpa, tmpa, tmp); - sp_3072_from_mp(qi, 68, qim); - sp_3072_mul_68(tmpa, tmpa, qi); - err = sp_3072_mod_68(tmpa, tmpa, p); + sp_3072_from_mp(qi, 70, qim); + sp_3072_mul_70(tmpa, tmpa, qi); + err = sp_3072_mod_70(tmpa, tmpa, p); } if (err == MP_OKAY) { - sp_3072_mul_68(tmpa, q, tmpa); - sp_3072_add_136(r, tmpb, tmpa); - sp_3072_norm_136(r); + sp_3072_mul_70(tmpa, q, tmpa); + sp_3072_add_140(r, tmpb, tmpa); + sp_3072_norm_140(r); sp_3072_to_bin(r, out); *outLen = 384; } if (t != NULL) { - XMEMSET(t, 0, sizeof(sp_digit) * 68 * 11); + XMEMSET(t, 0, sizeof(sp_digit) * 70 * 11); XFREE(t, NULL, DYNAMIC_TYPE_RSA); } return err; #else - sp_digit a[136 * 2]; - sp_digit p[68], q[68], dp[68], dq[68], qi[68]; - sp_digit tmp[136], tmpa[136], tmpb[136]; + sp_digit a[140 * 2]; + sp_digit p[70], q[70], dp[70], dq[70], qi[70]; + sp_digit tmp[140], tmpa[140], tmpb[140]; sp_digit* r = a; int err = MP_OKAY; @@ -6651,30 +6620,30 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(p, 68, pm); - sp_3072_from_mp(q, 68, qm); - sp_3072_from_mp(dp, 68, dpm); - sp_3072_from_mp(dq, 68, dqm); - sp_3072_from_mp(qi, 68, qim); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(p, 70, pm); + sp_3072_from_mp(q, 70, qm); + sp_3072_from_mp(dp, 70, dpm); + sp_3072_from_mp(dq, 70, dqm); + sp_3072_from_mp(qi, 70, qim); - err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1); + err = sp_3072_mod_exp_70(tmpa, a, dp, 1536, p, 1); } if (err == MP_OKAY) - err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1); + err = sp_3072_mod_exp_70(tmpb, a, dq, 1536, q, 1); if (err == MP_OKAY) { - sp_3072_sub_68(tmpa, tmpa, tmpb); - sp_3072_mask_68(tmp, p, tmpa[67] >> 31); - sp_3072_add_68(tmpa, tmpa, tmp); - sp_3072_mul_68(tmpa, tmpa, qi); - err = sp_3072_mod_68(tmpa, tmpa, p); + sp_3072_sub_70(tmpa, tmpa, tmpb); + sp_3072_mask_70(tmp, p, tmpa[69] >> 31); + sp_3072_add_70(tmpa, tmpa, tmp); + sp_3072_mul_70(tmpa, tmpa, qi); + err = sp_3072_mod_70(tmpa, tmpa, p); } if (err == MP_OKAY) { - sp_3072_mul_68(tmpa, tmpa, q); - sp_3072_add_136(r, tmpb, tmpa); - sp_3072_norm_136(r); + sp_3072_mul_70(tmpa, tmpa, q); + sp_3072_add_140(r, tmpb, tmpa); + sp_3072_norm_140(r); sp_3072_to_bin(r, out); *outLen = 384; @@ -6707,25 +6676,25 @@ static int sp_3072_to_mp(sp_digit* a, mp_int* r) err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { -#if DIGIT_BIT == 23 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 136); - r->used = 136; +#if DIGIT_BIT == 22 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 140); + r->used = 140; mp_clamp(r); -#elif DIGIT_BIT < 23 +#elif DIGIT_BIT < 22 int i, j = 0, s = 0; r->dp[0] = 0; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { r->dp[j] |= a[i] << s; r->dp[j] &= (1l << DIGIT_BIT) - 1; s = DIGIT_BIT - s; r->dp[++j] = a[i] >> s; - while (s + DIGIT_BIT <= 23) { + while (s + DIGIT_BIT <= 22) { s += DIGIT_BIT; r->dp[j] &= (1l << DIGIT_BIT) - 1; r->dp[++j] = a[i] >> s; } - s = 23 - s; + s = 22 - s; } r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; mp_clamp(r); @@ -6733,18 +6702,18 @@ static int sp_3072_to_mp(sp_digit* a, mp_int* r) int i, j = 0, s = 0; r->dp[0] = 0; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; - if (s + 23 >= DIGIT_BIT) { + if (s + 22 >= DIGIT_BIT) { #if DIGIT_BIT < 32 r->dp[j] &= (1l << DIGIT_BIT) - 1; #endif s = DIGIT_BIT - s; r->dp[++j] = a[i] >> s; - s = 23 - s; + s = 22 - s; } else - s += 23; + s += 22; } r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; mp_clamp(r); @@ -6780,22 +6749,22 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) } if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; - sp_3072_from_mp(b, 136, base); - sp_3072_from_mp(e, 136, exp); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_mp(e, 140, exp); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, mp_count_bits(exp), m, 0); + err = sp_3072_mod_exp_140(r, b, e, mp_count_bits(exp), m, 0); } if (err == MP_OKAY) { @@ -6803,13 +6772,13 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) } if (d != NULL) { - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_DH); } return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit bd[272], ed[136], md[136]; + sp_digit bd[280], ed[140], md[140]; #else sp_digit* d = NULL; #endif @@ -6827,15 +6796,15 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #ifdef WOLFSSL_SMALL_STACK if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; } #else @@ -6845,18 +6814,18 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif if (err == MP_OKAY) { - sp_3072_from_mp(b, 136, base); - sp_3072_from_mp(e, 136, exp); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_mp(e, 140, exp); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expBits, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expBits, m, 0); } if (err == MP_OKAY) { err = sp_3072_to_mp(r, res); } - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); #ifdef WOLFSSL_SMALL_STACK if (d != NULL) @@ -6897,22 +6866,22 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, } if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; - sp_3072_from_mp(b, 136, base); - sp_3072_from_bin(e, 136, exp, expLen); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_bin(e, 140, exp, expLen); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expLen * 8, m, 0); } if (err == MP_OKAY) { @@ -6925,13 +6894,13 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, } if (d != NULL) { - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_DH); } return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit bd[272], ed[136], md[136]; + sp_digit bd[280], ed[140], md[140]; #else sp_digit* d = NULL; #endif @@ -6949,15 +6918,15 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, #ifdef WOLFSSL_SMALL_STACK if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; } #else @@ -6967,11 +6936,11 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, #endif if (err == MP_OKAY) { - sp_3072_from_mp(b, 136, base); - sp_3072_from_bin(e, 136, exp, expLen); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_bin(e, 140, exp, expLen); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expLen * 8, m, 0); } if (err == MP_OKAY) { @@ -6983,7 +6952,7 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); #ifdef WOLFSSL_SMALL_STACK if (d != NULL) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 599e57240..a5af9d63c 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -2399,10 +2399,10 @@ void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c) /* Use Duff's device to unroll the loop. */ int idx = (c - 1) & ~3; switch (c % 4) { - case 0: do { pd[idx+0] = *b++; - case 3: pd[idx+1] = *b++; - case 2: pd[idx+2] = *b++; - case 1: pd[idx+3] = *b++; + case 0: do { pd[idx+0] = *b++; // fallthrough + case 3: pd[idx+1] = *b++; // fallthrough + case 2: pd[idx+2] = *b++; // fallthrough + case 1: pd[idx+3] = *b++; // fallthrough idx -= 4; } while ((c -= 4) > 0); }