Merge pull request #547 from toddouska/mathca

Remove timing resistant cache key bit monitor leaks
2016-09-13 14:34:23 -07:00
parent 05d78dc2ce 46a0ee8e69
commit b77c350153
3 changed files with 87 additions and 3 deletions
--- a/wolfcrypt/src/ecc.c
+++ b/wolfcrypt/src/ecc.c
@@ -1876,6 +1876,27 @@ int wc_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a,

 #else /* ECC_TIMING_RESISTANT */

+
+#if defined(TFM_TIMINING_RESISTANT) && defined(USE_FAST_MATH)
+    /* let's use the one we already have */
+    extern const wolfssl_word wc_off_on_addr[2];
+#else
+    static const wolfssl_word wc_off_on_addr[2] =
+    {
+    #if defined(WC_64BIT_CPU)
+        W64LIT(0x0000000000000000),
+        W64LIT(0xffffffffffffffff)
+    #elif defined(WC_16BIT_CPU)
+        0x0000U,
+        0xffffU
+    #else
+        /* 32 bit */
+        0x00000000U,
+        0xffffffffU
+    #endif
+    };
+#endif
+
 /**
   Perform a point multiplication  (timing resistant)
   k    The scalar to multiply by
@@ -2013,8 +2034,42 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
           if (err == MP_OKAY)
               err = ecc_projective_add_point(M[0], M[1], M[i^1], a, modulus,
                                                                       mp);
+            /* instead of using M[i] for double, which leaks key bit to cache
+             * monitor, use M[2] as temp, make sure address calc is constant,
+             * keep &M[0] and &M[1] in cache */
           if (err == MP_OKAY)
-               err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp);
+               err = mp_copy((mp_int*)
+                             ( ((wolfssl_word)&M[0]->x & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->x & wc_off_on_addr[i])),
+                             M[2]->x);
+           if (err == MP_OKAY)
+               err = mp_copy((mp_int*)
+                             ( ((wolfssl_word)&M[0]->y & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->y & wc_off_on_addr[i])),
+                             M[2]->y);
+           if (err == MP_OKAY)
+               err = mp_copy((mp_int*)
+                             ( ((wolfssl_word)&M[0]->z & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->z & wc_off_on_addr[i])),
+                             M[2]->z);
+           if (err == MP_OKAY)
+               err = ecc_projective_dbl_point(M[2], M[2], a, modulus, mp);
+           /* copy M[2] back to M[i] */
+           if (err == MP_OKAY)
+               err = mp_copy(M[2]->x,
+                             (mp_int*)
+                             ( ((wolfssl_word)&M[0]->x & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->x & wc_off_on_addr[i])) );
+           if (err == MP_OKAY)
+               err = mp_copy(M[2]->y,
+                             (mp_int*)
+                             ( ((wolfssl_word)&M[0]->y & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->y & wc_off_on_addr[i])) );
+           if (err == MP_OKAY)
+               err = mp_copy(M[2]->z,
+                             (mp_int*)
+                             ( ((wolfssl_word)&M[0]->z & wc_off_on_addr[i^1]) +
+                               ((wolfssl_word)&M[1]->z & wc_off_on_addr[i])) );
           if (err != MP_OKAY)
               break;
       } /* end for */
--- a/wolfcrypt/src/tfm.c
+++ b/wolfcrypt/src/tfm.c
@@ -1035,13 +1035,30 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)

 #ifdef TFM_TIMING_RESISTANT

+/* all off / all on pointer addresses for constant calculations */
+/* ecc.c uses same table */
+const wolfssl_word wc_off_on_addr[2] =
+{
+#if defined(WC_64BIT_CPU)
+    W64LIT(0x0000000000000000),
+    W64LIT(0xffffffffffffffff)
+#elif defined(WC_16BIT_CPU)
+    0x0000U,
+    0xffffU
+#else
+    /* 32 bit */
+    0x00000000U,
+    0xffffffffU
+#endif
+};
+
 /* timing resistant montgomery ladder based exptmod
   Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
   Cryptographic Hardware and Embedded Systems, CHES 2002
 */
 static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
 {
-  fp_int   R[2];
+  fp_int   R[3];
  fp_digit buf, mp;
  int      err, bitcnt, digidx, y;

@@ -1052,6 +1069,7 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)

  fp_init(&R[0]);
  fp_init(&R[1]);
+  fp_init(&R[2]);

  /* now we need R mod m */
  fp_montgomery_calc_normalization (&R[0], P);
@@ -1092,7 +1110,17 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)

    /* do ops */
    fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
-    fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
+
+    /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
+     * use R[2] as temp, make sure address calc is constant, keep
+     * &R[0] and &R[1] in cache */
+    fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
+                        ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ),
+            &R[2]);
+    fp_sqr(&R[2], &R[2]);          fp_montgomery_reduce(&R[2], P, mp);
+    fp_copy(&R[2],
+            (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) +
+                        ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) );
  }

   fp_montgomery_reduce(&R[0], P, mp);
--- a/wolfssl/wolfcrypt/types.h
+++ b/wolfssl/wolfcrypt/types.h
@@ -91,6 +91,7 @@
 	     defined(__mips64)  || defined(__x86_64__) || defined(_M_X64)) || \
         defined(__aarch64__)
 	    typedef word64 wolfssl_word;
+        #define WC_64BIT_CPU
 	#else
 	    typedef word32 wolfssl_word;
 	    #ifdef WORD64_AVAILABLE