Merge pull request #2129 from SparkiDev/sp_prime

Improve SP code and support prime check using SP in tfm.c
2026-05-04 16:44:14 +02:00 · 2019-03-11 13:53:57 -07:00
parent b8035371f4 5083330b86
commit ab0beb354b
11 changed files with 3881 additions and 3592 deletions
@@ -74,6 +74,17 @@
    #endif
 #endif

+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+#endif
+
 /* reverse an array, used for radix code */
 static void
 bn_reverse (unsigned char *s, int len)
@@ -4425,9 +4436,16 @@ static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
  if ((err = mp_init (&y)) != MP_OKAY) {
    goto LBL_R;
  }
-  if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
-    goto LBL_Y;
-  }
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+  if (mp_count_bits(a) == 1024)
+      err = sp_ModExp_1024(b, &r, a, &y);
+  else if (mp_count_bits(a) == 2048)
+      err = sp_ModExp_2048(b, &r, a, &y);
+  else
+#endif
+      err = mp_exptmod (b, &r, a, &y);
+  if (err != MP_OKAY)
+      goto LBL_Y;

  /* if y != 1 and y != n1 do */
  if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
@@ -4920,7 +4920,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -5065,7 +5065,48 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
    return err;
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[32], e[16], m[16];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+                                                   mp_count_bits(mod) != 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 16, base);
+        sp_2048_from_mp(e, 16, exp);
+        sp_2048_from_mp(m, 16, mod);
+
+        err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 16, 0, sizeof(*r) * 16);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_2048 */

@@ -11913,7 +11954,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -12058,7 +12099,48 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
    return err;
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[48], e[24], m[24];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+                                                   mp_count_bits(mod) != 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 24, base);
+        sp_3072_from_mp(e, 24, exp);
+        sp_3072_from_mp(m, 24, mod);
+
+        err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 24, 0, sizeof(*r) * 24);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_3072 */

@@ -27739,7 +27821,7 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
    return err;
 }

-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
 /* Returns 1 if the number of zero.
 * Implementation is constant time.
 *
@@ -27751,7 +27833,7 @@ static int sp_256_iszero_4(const sp_digit* a)
    return (a[0] | a[1] | a[2] | a[3]) == 0;
 }

-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
 /* Add 1 to a. (a = a + 1)
 *
 * a  A single precision integer.
@@ -28814,10 +28896,11 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
            hashLen = 32;

        sp_256_from_bin(e, 4, hash, hashLen);
-        sp_256_from_mp(x, 4, priv);
    }

    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 4, priv);
+
        /* New random point. */
        err = sp_256_ecc_gen_k_4(rng, k);
        if (err == MP_OKAY) {
@@ -2967,7 +2967,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,

 #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -3266,7 +3266,122 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
 #endif
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+                                                   mp_count_bits(mod) != 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 18 * 2;
+        m = e + 18;
+        r = b;
+
+        sp_2048_from_mp(b, 18, base);
+        sp_2048_from_mp(e, 18, exp);
+        sp_2048_from_mp(m, 18, mod);
+
+        err = sp_2048_mod_exp_18(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 18, 0, sizeof(*r) * 18);
+        err = sp_2048_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 18);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[36], ed[18], md[18];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+                                                   mp_count_bits(mod) != 1024) {
+        err = MP_READ_E;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 18 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 18 * 2;
+        m = e + 18;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 18, base);
+        sp_2048_from_mp(e, 18, exp);
+        sp_2048_from_mp(m, 18, mod);
+
+        err = sp_2048_mod_exp_18(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 18, 0, sizeof(*r) * 18);
+        err = sp_2048_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(sp_digit) * 18);
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL)
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_2048 */

@@ -6364,7 +6479,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,

 #endif /* !WOLFSSL_RSA_PUBLIC_ONLY */
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -6663,7 +6778,122 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
 #endif
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+#ifdef WOLFSSL_SP_SMALL
+    int err = MP_OKAY;
+    sp_digit* d = NULL;
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+                                                   mp_count_bits(mod) != 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 27 * 2;
+        m = e + 27;
+        r = b;
+
+        sp_3072_from_mp(b, 27, base);
+        sp_3072_from_mp(e, 27, exp);
+        sp_3072_from_mp(m, 27, mod);
+
+        err = sp_3072_mod_exp_27(r, b, e, mp_count_bits(exp), m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 27, 0, sizeof(*r) * 27);
+        err = sp_3072_to_mp(r, res);
+    }
+
+    if (d != NULL) {
+        XMEMSET(e, 0, sizeof(sp_digit) * 27);
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+    }
+    return err;
+#else
+#ifndef WOLFSSL_SMALL_STACK
+    sp_digit bd[54], ed[27], md[27];
+#else
+    sp_digit* d = NULL;
+#endif
+    sp_digit* b;
+    sp_digit* e;
+    sp_digit* m;
+    sp_digit* r;
+    int err = MP_OKAY;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+                                                   mp_count_bits(mod) != 1536) {
+        err = MP_READ_E;
+    }
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (err == MP_OKAY) {
+        d = (sp_digit*)XMALLOC(sizeof(*d) * 27 * 4, NULL, DYNAMIC_TYPE_DH);
+        if (d == NULL)
+            err = MEMORY_E;
+    }
+
+    if (err == MP_OKAY) {
+        b = d;
+        e = b + 27 * 2;
+        m = e + 27;
+        r = b;
+    }
+#else
+    r = b = bd;
+    e = ed;
+    m = md;
+#endif
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 27, base);
+        sp_3072_from_mp(e, 27, exp);
+        sp_3072_from_mp(m, 27, mod);
+
+        err = sp_3072_mod_exp_27(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 27, 0, sizeof(*r) * 27);
+        err = sp_3072_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(sp_digit) * 27);
+
+#ifdef WOLFSSL_SMALL_STACK
+    if (d != NULL)
+        XFREE(d, NULL, DYNAMIC_TYPE_DH);
+#endif
+
+    return err;
+#endif
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_3072 */

@@ -10395,7 +10625,7 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
    return err;
 }

-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
 /* Returns 1 if the number of zero.
 * Implementation is constant time.
 *
@@ -10407,7 +10637,7 @@ static int sp_256_iszero_5(const sp_digit* a)
    return (a[0] | a[1] | a[2] | a[3] | a[4]) == 0;
 }

-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
 /* Add 1 to a. (a = a + 1)
 *
 * r  A single precision integer.
@@ -11011,10 +11241,11 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
            hashLen = 32;

        sp_256_from_bin(e, 5, hash, hashLen);
-        sp_256_from_mp(x, 5, priv);
    }

    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 5, priv);
+
        /* New random point. */
        err = sp_256_ecc_gen_k_5(rng, k);
        if (err == MP_OKAY) {
@@ -684,6 +684,7 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
        "str	r4, [r8, 20]\n\t"
        "str	r5, [r8, 24]\n\t"
        "str	r6, [r8, 28]\n\t"
+        "mov	%[r], r8\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8"
@@ -1072,6 +1073,7 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
        "str	r4, [r8, 20]\n\t"
        "str	r5, [r8, 24]\n\t"
        "str	r6, [r8, 28]\n\t"
+        "mov	%[r], r8\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8"
@@ -2695,7 +2697,8 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
 }

 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
 *
@@ -2961,7 +2964,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
 }

 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 /* Caclulate the bottom digit of -1/a mod 2^n.
 *
@@ -2982,7 +2985,51 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
    *rho = -x;
 }

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r7, r6, %[b]\n\t"
+        "adds	r3, r6\n\t"
+        "adcs	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 /* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
 *
@@ -3619,7 +3666,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e,
 }
 #endif /* WOLFSSL_SP_SMALL */

-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 #ifdef WOLFSSL_HAVE_SP_DH
 /* r = 2^n mod m where n is the number of bits to reduce by.
@@ -3798,49 +3845,6 @@ static void sp_2048_mont_sqr_64(sp_digit* r, sp_digit* a, sp_digit* m,
    sp_2048_mont_reduce_64(r, m, mp);
 }

-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "mov	r6, #1\n\t"
-        "lsl	r6, r6, #8\n\t"
-        "add	r6, %[a]\n\t"
-        "mov	r8, %[r]\n\t"
-        "mov	r9, r6\n\t"
-        "mov	r3, #0\n\t"
-        "mov	r4, #0\n\t"
-        "1:\n\t"
-        "mov	%[r], #0\n\t"
-        "mov	r5, #0\n\t"
-        "# A[] * B\n\t"
-        "ldr	r6, [%[a]]\n\t"
-        "umull	r6, r7, r6, %[b]\n\t"
-        "adds	r3, r6\n\t"
-        "adcs	r4, r7\n\t"
-        "adc	r5, %[r]\n\t"
-        "# A[] * B - Done\n\t"
-        "mov	%[r], r8\n\t"
-        "str	r3, [%[r]]\n\t"
-        "mov	r3, r4\n\t"
-        "mov	r4, r5\n\t"
-        "add	%[r], #4\n\t"
-        "add	%[a], #4\n\t"
-        "mov	r8, %[r]\n\t"
-        "cmp	%[a], r9\n\t"
-        "blt	1b\n\t"
-        "str	r3, [%[r]]\n\t"
-        : [r] "+r" (r), [a] "+r" (a)
-        : [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
-    );
-}
-
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
@@ -4596,7 +4600,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -4741,7 +4745,48 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
    return err;
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[64], e[32], m[32];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+                                                   mp_count_bits(mod) != 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 32, base);
+        sp_2048_from_mp(e, 32, exp);
+        sp_2048_from_mp(m, 32, mod);
+
+        err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 32, 0, sizeof(*r) * 32);
+        err = sp_2048_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_2048 */

@@ -5379,6 +5424,7 @@ SP_NOINLINE static void sp_3072_mul_8(sp_digit* r, const sp_digit* a,
        "str	r4, [r8, 20]\n\t"
        "str	r5, [r8, 24]\n\t"
        "str	r6, [r8, 28]\n\t"
+        "mov	%[r], r8\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8"
@@ -5767,6 +5813,7 @@ SP_NOINLINE static void sp_3072_sqr_8(sp_digit* r, const sp_digit* a)
        "str	r4, [r8, 20]\n\t"
        "str	r5, [r8, 24]\n\t"
        "str	r6, [r8, 28]\n\t"
+        "mov	%[r], r8\n\t"
        :
        : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
        : "memory", "r3", "r4", "r5", "r6", "r7", "r8"
@@ -7890,7 +7937,8 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
 }

 #endif /* WOLFSSL_SP_SMALL */
-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* AND m into each word of a and store in r.
 *
@@ -8126,7 +8174,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
 }

 #endif /* WOLFSSL_SP_SMALL */
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 /* Caclulate the bottom digit of -1/a mod 2^n.
 *
@@ -8147,7 +8195,52 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
    *rho = -x;
 }

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+/* Mul a by digit b into r. (r = a * b)
+ *
+ * r  A single precision integer.
+ * a  A single precision integer.
+ * b  A single precision digit.
+ */
+SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
+        const sp_digit b)
+{
+    __asm__ __volatile__ (
+        "mov	r6, #1\n\t"
+        "lsl	r6, r6, #8\n\t"
+        "add	r6, #128\n\t"
+        "add	r6, %[a]\n\t"
+        "mov	r8, %[r]\n\t"
+        "mov	r9, r6\n\t"
+        "mov	r3, #0\n\t"
+        "mov	r4, #0\n\t"
+        "1:\n\t"
+        "mov	%[r], #0\n\t"
+        "mov	r5, #0\n\t"
+        "# A[] * B\n\t"
+        "ldr	r6, [%[a]]\n\t"
+        "umull	r6, r7, r6, %[b]\n\t"
+        "adds	r3, r6\n\t"
+        "adcs	r4, r7\n\t"
+        "adc	r5, %[r]\n\t"
+        "# A[] * B - Done\n\t"
+        "mov	%[r], r8\n\t"
+        "str	r3, [%[r]]\n\t"
+        "mov	r3, r4\n\t"
+        "mov	r4, r5\n\t"
+        "add	%[r], #4\n\t"
+        "add	%[a], #4\n\t"
+        "mov	r8, %[r]\n\t"
+        "cmp	%[a], r9\n\t"
+        "blt	1b\n\t"
+        "str	r3, [%[r]]\n\t"
+        : [r] "+r" (r), [a] "+r" (a)
+        : [b] "r" (b)
+        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
+    );
+}
+
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 #ifdef WOLFSSL_SP_SMALL
 /* Sub b from a into a. (a -= b)
 *
@@ -9041,7 +9134,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e,
 }
 #endif /* WOLFSSL_SP_SMALL */

-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 #ifdef WOLFSSL_HAVE_SP_DH
 /* r = 2^n mod m where n is the number of bits to reduce by.
@@ -9224,50 +9317,6 @@ static void sp_3072_mont_sqr_96(sp_digit* r, sp_digit* a, sp_digit* m,
    sp_3072_mont_reduce_96(r, m, mp);
 }

-/* Mul a by digit b into r. (r = a * b)
- *
- * r  A single precision integer.
- * a  A single precision integer.
- * b  A single precision digit.
- */
-SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
-        const sp_digit b)
-{
-    __asm__ __volatile__ (
-        "mov	r6, #1\n\t"
-        "lsl	r6, r6, #8\n\t"
-        "add	r6, #128\n\t"
-        "add	r6, %[a]\n\t"
-        "mov	r8, %[r]\n\t"
-        "mov	r9, r6\n\t"
-        "mov	r3, #0\n\t"
-        "mov	r4, #0\n\t"
-        "1:\n\t"
-        "mov	%[r], #0\n\t"
-        "mov	r5, #0\n\t"
-        "# A[] * B\n\t"
-        "ldr	r6, [%[a]]\n\t"
-        "umull	r6, r7, r6, %[b]\n\t"
-        "adds	r3, r6\n\t"
-        "adcs	r4, r7\n\t"
-        "adc	r5, %[r]\n\t"
-        "# A[] * B - Done\n\t"
-        "mov	%[r], r8\n\t"
-        "str	r3, [%[r]]\n\t"
-        "mov	r3, r4\n\t"
-        "mov	r4, r5\n\t"
-        "add	%[r], #4\n\t"
-        "add	%[a], #4\n\t"
-        "mov	r8, %[r]\n\t"
-        "cmp	%[a], r9\n\t"
-        "blt	1b\n\t"
-        "str	r3, [%[r]]\n\t"
-        : [r] "+r" (r), [a] "+r" (a)
-        : [b] "r" (b)
-        : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"
-    );
-}
-
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
 * d1   The high order half of the number to divide.
@@ -10025,7 +10074,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -10170,7 +10219,48 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,
    return err;
 }

-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[96], e[48], m[48];
+    sp_digit* r = b;
+    int expBits = mp_count_bits(exp);
+
+    if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+                                                   mp_count_bits(mod) != 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 48, base);
+        sp_3072_from_mp(e, 48, exp);
+        sp_3072_from_mp(m, 48, mod);
+
+        err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 48, 0, sizeof(*r) * 48);
+        err = sp_3072_to_mp(r, res);
+        res->used = mod->used;
+        mp_clamp(res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */

 #endif /* WOLFSSL_SP_NO_3072 */

@@ -10286,7 +10376,7 @@ static sp_digit p256_b[8] = {
 * a  The number to convert.
 * m  The modulus (prime).
 */
-int sp_256_mod_mul_norm_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static int sp_256_mod_mul_norm_8(sp_digit* r, sp_digit* a, sp_digit* m)
 {
    int64_t t[8];
    int64_t a64[8];
@@ -10356,7 +10446,7 @@ int sp_256_mod_mul_norm_8(sp_digit* r, sp_digit* a, sp_digit* m)
 * r  A single precision integer.
 * a  A multi-precision integer.
 */
-void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
+static void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
 {
 #if DIGIT_BIT == 32
    int j;
@@ -10425,7 +10515,7 @@ void sp_256_from_mp(sp_digit* r, int max, mp_int* a)
 * p   Point of type sp_point (result).
 * pm  Point of type ecc_point.
 */
-void sp_256_point_from_ecc_point_8(sp_point* p, ecc_point* pm)
+static void sp_256_point_from_ecc_point_8(sp_point* p, ecc_point* pm)
 {
    XMEMSET(p->x, 0, sizeof(p->x));
    XMEMSET(p->y, 0, sizeof(p->y));
@@ -10441,7 +10531,7 @@ void sp_256_point_from_ecc_point_8(sp_point* p, ecc_point* pm)
 * a  A single precision integer.
 * r  A multi-precision integer.
 */
-int sp_256_to_mp(sp_digit* a, mp_int* r)
+static int sp_256_to_mp(sp_digit* a, mp_int* r)
 {
    int err;

@@ -10501,7 +10591,7 @@ int sp_256_to_mp(sp_digit* a, mp_int* r)
 * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
 * MP_OKAY.
 */
-int sp_256_point_to_ecc_point_8(sp_point* p, ecc_point* pm)
+static int sp_256_point_to_ecc_point_8(sp_point* p, ecc_point* pm)
 {
    int err;

@@ -10521,7 +10611,7 @@ int sp_256_point_to_ecc_point_8(sp_point* p, ecc_point* pm)
 * return -ve, 0 or +ve if a is less than, equal to or greater than b
 * respectively.
 */
-int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b)
+SP_NOINLINE static int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b)
 {
    sp_digit r = -1;

@@ -10574,7 +10664,7 @@ int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b)
 * b  A single precision number to subtract.
 * m  Mask value to apply.
 */
-sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a,
+SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a,
        sp_digit* b, sp_digit m)
 {
    sp_digit c = 0;
@@ -10609,7 +10699,7 @@ sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a,
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
-void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m,
        sp_digit mp)
 {
    (void)mp;
@@ -10729,7 +10819,7 @@ void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m,
 * m   The single precision number representing the modulus.
 * mp  The digit representing the negative inverse of m mod 2^n.
 */
-void sp_256_mont_reduce_order_8(sp_digit* a, sp_digit* m,
+SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, sp_digit* m,
        sp_digit mp)
 {
    sp_digit ca = 0;
@@ -10821,7 +10911,7 @@ void sp_256_mont_reduce_order_8(sp_digit* a, sp_digit* m,
 * a  A single precision integer.
 * b  A single precision integer.
 */
-void sp_256_mul_8(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
 {
    sp_digit tmp[8];
@@ -11335,7 +11425,7 @@ void sp_256_mul_8(sp_digit* r, const sp_digit* a,
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
-void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b,
+static void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b,
        sp_digit* m, sp_digit mp)
 {
    sp_256_mul_8(r, a, b);
@@ -11347,7 +11437,7 @@ void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b,
 * r  A single precision integer.
 * a  A single precision integer.
 */
-void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
+SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
 {
    sp_digit tmp[8];
    __asm__ __volatile__ (
@@ -11738,7 +11828,7 @@ void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
-void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m,
+static void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m,
        sp_digit mp)
 {
    sp_256_sqr_8(r, a);
@@ -11754,7 +11844,7 @@ void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m,
 * m   Modulus (prime).
 * mp  Montogmery mulitplier.
 */
-void sp_256_mont_sqr_n_8(sp_digit* r, sp_digit* a, int n,
+static void sp_256_mont_sqr_n_8(sp_digit* r, sp_digit* a, int n,
        sp_digit* m, sp_digit mp)
 {
    sp_256_mont_sqr_8(r, a, m, mp);
@@ -11777,7 +11867,7 @@ static const uint32_t p256_mod_2[8] = {
 * a   Number to invert.
 * td  Temporary data.
 */
-void sp_256_mont_inv_8(sp_digit* r, sp_digit* a, sp_digit* td)
+static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a, sp_digit* td)
 {
 #ifdef WOLFSSL_SP_SMALL
    sp_digit* t = td;
@@ -11850,7 +11940,7 @@ void sp_256_mont_inv_8(sp_digit* r, sp_digit* a, sp_digit* td)
 * p  Montgomery form projective co-ordinate point.
 * t  Temporary ordinate data.
 */
-void sp_256_map_8(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_map_8(sp_point* r, sp_point* p, sp_digit* t)
 {
    sp_digit* t1 = t;
    sp_digit* t2 = t + 2*8;
@@ -11929,7 +12019,7 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
 * a  A single precision integer.
 * b  A single precision integer.
 */
-sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
 {
    sp_digit c = 0;
@@ -11985,7 +12075,7 @@ sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
 * b   Second number to add in Montogmery form.
 * m   Modulus (prime).
 */
-void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b,
+SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b,
        sp_digit* m)
 {
    (void)m;
@@ -12065,7 +12155,7 @@ void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b,
 * a   Number to double in Montogmery form.
 * m   Modulus (prime).
 */
-void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m)
+SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m)
 {
    (void)m;

@@ -12136,7 +12226,7 @@ void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m)
 * a   Number to triple in Montogmery form.
 * m   Modulus (prime).
 */
-void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m)
+SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m)
 {
    (void)m;

@@ -12258,7 +12348,7 @@ void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m)
 * b   Number to subtract with in Montogmery form.
 * m   Modulus (prime).
 */
-void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
+SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
        sp_digit* m)
 {
    (void)m;
@@ -12335,7 +12425,7 @@ void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b,
 * a  Number to divide.
 * m  Modulus (prime).
 */
-void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m)
+SP_NOINLINE static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m)
 {
    __asm__ __volatile__ (
        "ldr	r7, [%[a], #0]\n\t"
@@ -12422,7 +12512,7 @@ void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m)
 * p  Point to double.
 * t  Temporary ordinate data.
 */
-void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p, sp_digit* t)
+static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p, sp_digit* t)
 {
    sp_point* rp[2];
    sp_digit* t1 = t;
@@ -12532,7 +12622,7 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
 * a  A single precision integer.
 * b  A single precision integer.
 */
-sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
+SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
        const sp_digit* b)
 {
    sp_digit c = 0;
@@ -12600,7 +12690,7 @@ static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
 * q  Second point to add.
 * t  Temporary ordinate data.
 */
-void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q,
+static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q,
        sp_digit* t)
 {
    sp_point* ap[2];
@@ -12889,6 +12979,28 @@ static void sp_256_proj_point_dbl_n_8(sp_point* r, sp_point* p, int n,
    sp_256_div2_8(y, y, p256_mod);
 }

+/* Convert the projective point to affine.
+ * Ordinates are in Montgomery form.
+ *
+ * a  Point to convert.
+ * t  Temprorary data.
+ */
+static void sp_256_proj_to_affine_8(sp_point* a, sp_digit* t)
+{
+    sp_digit* t1 = t;
+    sp_digit* t2 = t + 2 * 8;
+    sp_digit* tmp = t + 4 * 8;
+
+    sp_256_mont_inv_8(t1, a->z, tmp);
+
+    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
+
+    sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
+    sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
+    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
+}
+
 #endif /* FP_ECC */
 /* Add two Montgomery form projective points. The second point has a q value of
 * one.
@@ -12899,7 +13011,7 @@ static void sp_256_proj_point_dbl_n_8(sp_point* r, sp_point* p, int n,
 * q  Second point to add.
 * t  Temporary ordinate data.
 */
-void sp_256_proj_point_add_qz1_8(sp_point* r, sp_point* p,
+static void sp_256_proj_point_add_qz1_8(sp_point* r, sp_point* p,
        sp_point* q, sp_digit* t)
 {
    sp_point* ap[2];
@@ -12969,28 +13081,6 @@ void sp_256_proj_point_add_qz1_8(sp_point* r, sp_point* p,

 #ifdef WOLFSSL_SP_SMALL
 #ifdef FP_ECC
-/* Convert the projective point to affine.
- * Ordinates are in Montgomery form.
- *
- * a  Point to convert.
- * t  Temprorary data.
- */
-static void sp_256_proj_to_affine_8(sp_point* a, sp_digit* t)
-{
-    sp_digit* t1 = t;
-    sp_digit* t2 = t + 2 * 8;
-    sp_digit* tmp = t + 4 * 8;
-
-    sp_256_mont_inv_8(t1, a->z, tmp);
-
-    sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
-
-    sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
-    sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
-    XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
-}
-
 /* Generate the pre-computed table of points for the base point.
 *
 * a      The base point.
@@ -13370,7 +13460,7 @@ static int sp_256_gen_stripe_table_8(sp_point* a,
 * heap  Heap to use for allocation.
 * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
 */
-int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
+static int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
        sp_table_entry* table, sp_digit* k, int map, void* heap)
 {
 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
@@ -13415,15 +13505,15 @@ int sp_256_ecc_mulmod_stripe_8(sp_point* r, sp_point* g,
            for (j=0,x=i; j<8; j++,x+=32)
                y |= ((k[x / 32] >> (x % 32)) & 1) << j;

-            sp_256_proj_point_dbl_8(rt, rt, td);
+            sp_256_proj_point_dbl_8(rt, rt, t);
            XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
            XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
            p->infinity = table[y].infinity;
-            sp_256_proj_point_add_qz1_8(rt, rt, p, td);
+            sp_256_proj_point_add_qz1_8(rt, rt, p, t);
        }

        if (map)
-            sp_256_map_8(r, rt, td);
+            sp_256_map_8(r, rt, t);
        else
            XMEMCPY(r, rt, sizeof(sp_point));
    }
@@ -15278,7 +15368,7 @@ static sp_table_entry p256_table[256] = {
 * heap  Heap to use for allocation.
 * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
 */
-int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
+static int sp_256_ecc_mulmod_base_8(sp_point* r, sp_digit* k,
        int map, void* heap)
 {
    return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
@@ -15333,7 +15423,7 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap)
    return err;
 }

-#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
+#if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN)
 /* Returns 1 if the number of zero.
 * Implementation is constant time.
 *
@@ -15345,7 +15435,7 @@ static int sp_256_iszero_8(const sp_digit* a)
    return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
 }

-#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
+#endif /* WOLFSSL_VALIDATE_ECC_KEYGEN || HAVE_ECC_SIGN */
 /* Add 1 to a. (a = a + 1)
 *
 * a  A single precision integer.
@@ -15864,7 +15954,7 @@ static WC_INLINE int sp_256_div_8(sp_digit* a, sp_digit* d, sp_digit* m,
 * m  A single precision number that is the modulus to reduce with.
 * returns MP_OKAY indicating success.
 */
-int sp_256_mod_8(sp_digit* r, sp_digit* a, sp_digit* m)
+static WC_INLINE int sp_256_mod_8(sp_digit* r, sp_digit* a, sp_digit* m)
 {
    return sp_256_div_8(a, m, NULL, r);
 }
@@ -15890,7 +15980,7 @@ static const uint32_t p256_order_low[4] = {
 * a  First operand of the multiplication.
 * b  Second operand of the multiplication.
 */
-void sp_256_mont_mul_order_8(sp_digit* r, sp_digit* a, sp_digit* b)
+static void sp_256_mont_mul_order_8(sp_digit* r, sp_digit* a, sp_digit* b)
 {
    sp_256_mul_8(r, a, b);
    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
@@ -15901,7 +15991,7 @@ void sp_256_mont_mul_order_8(sp_digit* r, sp_digit* a, sp_digit* b)
 * r  Result of the squaring.
 * a  Number to square.
 */
-void sp_256_mont_sqr_order_8(sp_digit* r, sp_digit* a)
+static void sp_256_mont_sqr_order_8(sp_digit* r, sp_digit* a)
 {
    sp_256_sqr_8(r, a);
    sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
@@ -15914,7 +16004,7 @@ void sp_256_mont_sqr_order_8(sp_digit* r, sp_digit* a)
 * r  Result of the squaring.
 * a  Number to square.
 */
-void sp_256_mont_sqr_n_order_8(sp_digit* r, sp_digit* a, int n)
+static void sp_256_mont_sqr_n_order_8(sp_digit* r, sp_digit* a, int n)
 {
    int i;

@@ -16102,6 +16192,7 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,

    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
        sp_256_from_mp(x, 8, priv);
+
        /* New random point. */
        err = sp_256_ecc_gen_k_8(rng, k);
        if (err == MP_OKAY) {
@@ -39,9 +39,7 @@
                                    defined(WOLFSSL_HAVE_SP_ECC)

 #ifdef RSA_LOW_MEM
-#ifndef SP_RSA_PRIVATE_EXP_D
 #define SP_RSA_PRIVATE_EXP_D
-#endif

 #ifndef WOLFSSL_SP_SMALL
 #define WOLFSSL_SP_SMALL
@@ -341,8 +339,9 @@ SP_NOINLINE static void sp_2048_sqr_avx2_32(sp_digit* r, const sp_digit* a)
 }
 #endif /* HAVE_INTEL_AVX2 */

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 /* Caclulate the bottom digit of -1/a mod 2^n.
 *
@@ -364,7 +363,9 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho)
    *rho = -x;
 }

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+extern void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, const sp_digit b);
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 extern sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b);
 /* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 2048 bits, just need to subtract.
@@ -817,7 +818,7 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, sp_digit* a, sp_digit* e,
 }
 #endif /* HAVE_INTEL_AVX2 */

-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 #ifdef WOLFSSL_HAVE_SP_DH
 /* r = 2^n mod m where n is the number of bits to reduce by.
@@ -867,7 +868,7 @@ static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m,
    sp_2048_mont_reduce_32(r, m, mp);
 }

-extern void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, const sp_digit b);
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 extern void sp_2048_mul_d_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit b);
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
@@ -976,6 +977,7 @@ static WC_INLINE int sp_2048_mod_32(sp_digit* r, sp_digit* a, sp_digit* m)
    return sp_2048_div_32(a, m, NULL, r);
 }

+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 /* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
@@ -1671,7 +1673,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -1830,9 +1832,56 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,

    return err;
 }
-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[32], e[16], m[16];
+    sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+    word32 cpuid_flags = cpuid_get_flags();
+#endif
+    int expBits = mp_count_bits(exp);

-#endif /* !WOLFSSL_SP_NO_2048 */
+    if (mp_count_bits(base) > 1024 || expBits > 1024 ||
+                                                   mp_count_bits(mod) != 1024) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_2048_from_mp(b, 16, base);
+        sp_2048_from_mp(e, 16, exp);
+        sp_2048_from_mp(m, 16, mod);
+
+#ifdef HAVE_INTEL_AVX2
+        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+            err = sp_2048_mod_exp_avx2_16(r, b, e, expBits, m, 0);
+        else
+#endif
+            err = sp_2048_mod_exp_16(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 16, 0, sizeof(*r) * 16);
+        err = sp_2048_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
+
+#endif /* WOLFSSL_SP_NO_2048 */

 #ifndef WOLFSSL_SP_NO_3072
 /* Read big endian unsigned byte aray into r.
@@ -2123,8 +2172,9 @@ SP_NOINLINE static void sp_3072_sqr_avx2_48(sp_digit* r, const sp_digit* a)
 }
 #endif /* HAVE_INTEL_AVX2 */

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 /* Caclulate the bottom digit of -1/a mod 2^n.
 *
@@ -2146,7 +2196,9 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho)
    *rho = -x;
 }

-#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA)
+extern void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, const sp_digit b);
+#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \
+       !defined(WOLFSSL_RSA_PUBLIC_ONLY)
 extern sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b);
 /* r = 2^n mod m where n is the number of bits to reduce by.
 * Given m must be 3072 bits, just need to subtract.
@@ -2599,7 +2651,7 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, sp_digit* a, sp_digit* e,
 }
 #endif /* HAVE_INTEL_AVX2 */

-#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA */
+#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */

 #ifdef WOLFSSL_HAVE_SP_DH
 /* r = 2^n mod m where n is the number of bits to reduce by.
@@ -2649,7 +2701,7 @@ static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m,
    sp_3072_mont_reduce_48(r, m, mp);
 }

-extern void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, const sp_digit b);
+#ifndef WOLFSSL_RSA_PUBLIC_ONLY
 extern void sp_3072_mul_d_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit b);
 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
 *
@@ -2758,6 +2810,7 @@ static WC_INLINE int sp_3072_mod_48(sp_digit* r, sp_digit* a, sp_digit* m)
    return sp_3072_div_48(a, m, NULL, r);
 }

+#endif /* WOLFSSL_RSA_PUBLIC_ONLY */
 /* Divide d in a and put remainder into r (m*d + r = a)
 * m is not calculated as it is not needed at this time.
 *
@@ -3453,7 +3506,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm,
    return err;
 }
 #endif /* WOLFSSL_HAVE_SP_RSA */
-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
 /* Convert an array of sp_digit to an mp_int.
 *
 * a  A single precision integer.
@@ -3612,9 +3665,56 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,

    return err;
 }
-#endif /* WOLFSSL_HAVE_SP_DH */
+/* Perform the modular exponentiation for Diffie-Hellman.
+ *
+ * base  Base. MP integer.
+ * exp   Exponent. MP integer.
+ * mod   Modulus. MP integer.
+ * res   Result. MP integer.
+ * returs 0 on success, MP_READ_E if there are too many bytes in an array
+ * and MEMORY_E if memory allocation fails.
+ */
+int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res)
+{
+    int err = MP_OKAY;
+    sp_digit b[48], e[24], m[24];
+    sp_digit* r = b;
+#ifdef HAVE_INTEL_AVX2
+    word32 cpuid_flags = cpuid_get_flags();
+#endif
+    int expBits = mp_count_bits(exp);

-#endif /* !WOLFSSL_SP_NO_3072 */
+    if (mp_count_bits(base) > 1536 || expBits > 1536 ||
+                                                   mp_count_bits(mod) != 1536) {
+        err = MP_READ_E;
+    }
+
+    if (err == MP_OKAY) {
+        sp_3072_from_mp(b, 24, base);
+        sp_3072_from_mp(e, 24, exp);
+        sp_3072_from_mp(m, 24, mod);
+
+#ifdef HAVE_INTEL_AVX2
+        if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags))
+            err = sp_3072_mod_exp_avx2_24(r, b, e, expBits, m, 0);
+        else
+#endif
+            err = sp_3072_mod_exp_24(r, b, e, expBits, m, 0);
+    }
+
+    if (err == MP_OKAY) {
+        XMEMSET(r + 24, 0, sizeof(*r) * 24);
+        err = sp_3072_to_mp(r, res);
+    }
+
+    XMEMSET(e, 0, sizeof(e));
+
+    return err;
+}
+
+#endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
+
+#endif /* WOLFSSL_SP_NO_3072 */

 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH */
 #ifdef WOLFSSL_HAVE_SP_ECC
@@ -20266,10 +20366,11 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv,
            hashLen = 32;

        sp_256_from_bin(e, 4, hash, hashLen);
-        sp_256_from_mp(x, 4, priv);
    }

    for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
+        sp_256_from_mp(x, 4, priv);
+
        /* New random point. */
        err = sp_256_ecc_gen_k_4(rng, k);
        if (err == MP_OKAY) {
@@ -21130,7 +21231,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
    return err;
 }
 #endif
-#endif /* !WOLFSSL_SP_NO_256 */
+#endif /* WOLFSSL_SP_NO_256 */
 #endif /* WOLFSSL_HAVE_SP_ECC */
 #endif /* WOLFSSL_SP_X86_64_ASM */
 #endif /* WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_ECC */
@@ -58,6 +58,18 @@
    #include <stdio.h>
 #endif

+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+#endif
+
+

 /* math settings check */
 word32 CheckRunTimeSettings(void)
@@ -3484,7 +3496,6 @@ int mp_prime_is_prime(mp_int* a, int t, int* result)
    return fp_isprime_ex(a, t, result);
 }

-
 /* Miller-Rabin test of "a" to the base of "b" as described in
 * HAC pp. 139 Algorithm 4.24
 *
@@ -3526,7 +3537,14 @@ static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result,

  /* compute y = b**r mod a */
  fp_zero(y);
-  fp_exptmod(b, r, a, y);
+#if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
+  if (fp_count_bits(a) == 1024)
+      sp_ModExp_1024(b, r, a, y);
+  else if (fp_count_bits(a) == 2048)
+      sp_ModExp_2048(b, r, a, y);
+  else
+#endif
+      fp_exptmod(b, r, a, y);

  /* if y != 1 and y != n1 do */
  if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) {
@@ -64,13 +64,21 @@ WOLFSSL_LOCAL int sp_RsaPrivate_3072(const byte* in, word32 inLen,

 #endif /* WOLFSSL_HAVE_SP_RSA */

-#ifdef WOLFSSL_HAVE_SP_DH
+#if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)

+WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
+WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
+    mp_int* res);
 WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
    mp_int* res);
 WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
    mp_int* res);

+#endif
+
+#ifdef WOLFSSL_HAVE_SP_DH
+
 WOLFSSL_LOCAL int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen,
    mp_int* mod, byte* out, word32* outLen);
 WOLFSSL_LOCAL int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen,