Fixes to fp_mul and fp_div to clear any excess digits on the destination. Added compile-time check to confirm FP_SIZE is compatible with TFM_ acceleration defines enabled. Updated comments in other places where excess digits are cleared.

2016-05-04 23:19:24 -07:00
parent 7c3fbd7644
commit a5d27853fa
2 changed files with 99 additions and 140 deletions
--- a/wolfcrypt/src/tfm.c
+++ b/wolfcrypt/src/tfm.c
@ -118,6 +118,8 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c)
  }

  c->used = x;
+
+  /* zero any excess digits on the destination that we didn't write to */
  for (; x < oldused; x++) {
     c->dp[x] = 0;
  }
@ -179,6 +181,8 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
     c->dp[x]  = (fp_digit)t;
     t         = (t >> DIGIT_BIT)&1;
   }
+
+  /* zero any excess digits on the destination that we didn't write to */
  for (; x < oldused; x++) {
     c->dp[x] = 0;
  }
@ -188,7 +192,9 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
 /* c = a * b */
 void fp_mul(fp_int *A, fp_int *B, fp_int *C)
 {
-    int   y, yy;
+    int   y, yy, oldused;
+
+    oldused = C->used;

    y  = MAX(A->used, B->used);
    yy = MIN(A->used, B->used);
@ -196,7 +202,7 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
    /* call generic if we're out of range */
    if (y + yy > FP_SIZE) {
       fp_mul_comba(A, B, C);
-       return ;
+       goto clean;
    }

    /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
@ -205,98 +211,104 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C)
       if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
    */

-#ifdef TFM_MUL3
+#if defined(TFM_MUL3) && FP_SIZE >= 6
        if (y <= 3) {
           fp_mul_comba3(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL4
+#if defined(TFM_MUL4) && FP_SIZE >= 8
        if (y == 4) {
           fp_mul_comba4(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL6
+#if defined(TFM_MUL6) && FP_SIZE >= 12
        if (y <= 6) {
           fp_mul_comba6(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL7
+#if defined(TFM_MUL7) && FP_SIZE >= 14
        if (y == 7) {
           fp_mul_comba7(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL8
+#if defined(TFM_MUL8) && FP_SIZE >= 16
        if (y == 8) {
           fp_mul_comba8(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL9
+#if defined(TFM_MUL9) && FP_SIZE >= 18
        if (y == 9) {
           fp_mul_comba9(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL12
+#if defined(TFM_MUL12) && FP_SIZE >= 24
        if (y <= 12) {
           fp_mul_comba12(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#ifdef TFM_MUL17
+#if defined(TFM_MUL17) && FP_SIZE >= 34
        if (y <= 17) {
           fp_mul_comba17(A,B,C);
-           return;
+           goto clean;
        }
 #endif

-#ifdef TFM_SMALL_SET
+#if defined(TFM_SMALL_SET) && FP_SIZE >= 32
        if (y <= 16) {
           fp_mul_comba_small(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_MUL20)
+#if defined(TFM_MUL20) && FP_SIZE >= 40
        if (y <= 20) {
           fp_mul_comba20(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_MUL24)
+#if defined(TFM_MUL24) && FP_SIZE >= 48
        if (yy >= 16 && y <= 24) {
           fp_mul_comba24(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_MUL28)
+#if defined(TFM_MUL28) && FP_SIZE >= 56
        if (yy >= 20 && y <= 28) {
           fp_mul_comba28(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_MUL32)
+#if defined(TFM_MUL32) && FP_SIZE >= 64
        if (yy >= 24 && y <= 32) {
           fp_mul_comba32(A,B,C);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_MUL48)
+#if defined(TFM_MUL48) && FP_SIZE >= 96
        if (yy >= 40 && y <= 48) {
-           fp_mul_comba48(A,B,C);
-           return;
+          fp_mul_comba48(A,B,C);
+          goto clean;
        }
 #endif
-#if defined(TFM_MUL64)
+#if defined(TFM_MUL64) && FP_SIZE >= 128
        if (yy >= 56 && y <= 64) {
           fp_mul_comba64(A,B,C);
-           return;
+           goto clean;
        }
 #endif
        fp_mul_comba(A,B,C);
+
+clean:
+    /* zero any excess digits on the destination that we didn't write to */
+    for (y = C->used; y < oldused; y++) {
+        C->dp[y] = 0;
+    }
 }

 void fp_mul_2(fp_int * a, fp_int * b)
@ -340,9 +352,7 @@ void fp_mul_2(fp_int * a, fp_int * b)
      ++(b->used);
    }

-    /* now zero any excess digits on the destination
-     * that we didn't write to
-     */
+    /* zero any excess digits on the destination that we didn't write to */
    tmpb = b->dp + b->used;
    for (x = b->used; x < oldused; x++) {
      *tmpb++ = 0;
@ -370,6 +380,8 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
      c->dp[c->used++] = (fp_digit) w;
      ++x;
   }
+
+   /* zero any excess digits on the destination that we didn't write to */
   for (; x < oldused; x++) {
      c->dp[x] = 0;
   }
@ -627,9 +639,7 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
  if (d != NULL) {
    fp_div_2d (&x, norm, &x, NULL);

-/* the following is a kludge, essentially we were seeing the right remainder but
-   with excess digits that should have been zero
- */
+    /* zero any excess digits on the destination that we didn't write to */
    for (i = b->used; i < x.used; i++) {
        x.dp[i] = 0;
    }
@ -669,7 +679,7 @@ void fp_div_2(fp_int * a, fp_int * b)
      r = rr;
    }

-    /* zero excess digits */
+    /* zero any excess digits on the destination that we didn't write to */
    tmpb = b->dp + b->used;
    for (x = b->used; x < oldused; x++) {
      *tmpb++ = 0;
@ -1267,105 +1277,114 @@ void fp_2expt(fp_int *a, int b)
 /* b = a*a  */
 void fp_sqr(fp_int *A, fp_int *B)
 {
-    int y = A->used;
+    int y, oldused;
+
+    oldused = B->used;
+    y = A->used;

    /* call generic if we're out of range */
    if (y + y > FP_SIZE) {
       fp_sqr_comba(A, B);
-       return ;
+       goto clean;
    }

-#if defined(TFM_SQR3)
+#if defined(TFM_SQR3) && FP_SIZE >= 6
        if (y <= 3) {
           fp_sqr_comba3(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR4)
+#if defined(TFM_SQR4) && FP_SIZE >= 8
        if (y == 4) {
           fp_sqr_comba4(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR6)
+#if defined(TFM_SQR6) && FP_SIZE >= 12
        if (y <= 6) {
           fp_sqr_comba6(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR7)
+#if defined(TFM_SQR7) && FP_SIZE >= 14
        if (y == 7) {
           fp_sqr_comba7(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR8)
+#if defined(TFM_SQR8) && FP_SIZE >= 16
        if (y == 8) {
           fp_sqr_comba8(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR9)
+#if defined(TFM_SQR9) && FP_SIZE >= 18
        if (y == 9) {
           fp_sqr_comba9(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR12)
+#if defined(TFM_SQR12) && FP_SIZE >= 24
        if (y <= 12) {
           fp_sqr_comba12(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR17)
+#if defined(TFM_SQR17) && FP_SIZE >= 34
        if (y <= 17) {
           fp_sqr_comba17(A,B);
-           return;
+           goto clean;
        }
 #endif
 #if defined(TFM_SMALL_SET)
        if (y <= 16) {
           fp_sqr_comba_small(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR20)
+#if defined(TFM_SQR20) && FP_SIZE >= 40
        if (y <= 20) {
           fp_sqr_comba20(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR24)
+#if defined(TFM_SQR24) && FP_SIZE >= 48
        if (y <= 24) {
           fp_sqr_comba24(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR28)
+#if defined(TFM_SQR28) && FP_SIZE >= 56
        if (y <= 28) {
           fp_sqr_comba28(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR32)
+#if defined(TFM_SQR32) && FP_SIZE >= 64
        if (y <= 32) {
           fp_sqr_comba32(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR48)
+#if defined(TFM_SQR48) && FP_SIZE >= 96
        if (y <= 48) {
           fp_sqr_comba48(A,B);
-           return;
+           goto clean;
        }
 #endif
-#if defined(TFM_SQR64)
+#if defined(TFM_SQR64) && FP_SIZE >= 128
        if (y <= 64) {
           fp_sqr_comba64(A,B);
-           return;
+           goto clean;
        }
 #endif
       fp_sqr_comba(A, B);
+
+clean:
+  /* zero any excess digits on the destination that we didn't write to */
+  for (y = B->used; y < oldused; y++) {
+    B->dp[y] = 0;
+  }
 }

 /* generic comba squarer */
@ -1652,7 +1671,8 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp)
     *tmpm++ = *_c++;
  }

-  for (; x < oldused; x++)   {
+  /* zero any excess digits on the destination that we didn't write to */
+  for (; x < oldused; x++) {
     *tmpm++ = 0;
  }

@ -1733,7 +1753,8 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
     *tmpm++ = *_c++;
  }

-  for (; x < oldused; x++)   {
+  /* zero any excess digits on the destination that we didn't write to */
+  for (; x < oldused; x++) {
     *tmpm++ = 0;
  }

--- a/wolfssl/wolfcrypt/tfm.h
+++ b/wolfssl/wolfcrypt/tfm.h
@ -211,6 +211,7 @@
 #if defined(FP_64BIT)
   /* for GCC only on supported platforms */
   typedef unsigned long long fp_digit;   /* 64bit, 128 uses mode(TI) below */
+   #define SIZEOF_FP_DIGIT 8
   typedef unsigned long      fp_word __attribute__ ((mode(TI)));
 #else
   #if defined(_MSC_VER) || defined(__BORLANDC__)
@ -221,12 +222,14 @@

   #ifndef NO_64BIT
      typedef unsigned int       fp_digit;
+      #define SIZEOF_FP_DIGIT 4
      typedef ulong64            fp_word;
      #define FP_32BIT
   #else
      /* some procs like coldfire prefer not to place multiply into 64bit type
         even though it exists */
      typedef unsigned short     fp_digit;
+      #define SIZEOF_FP_DIGIT 2
      typedef unsigned int       fp_word;
   #endif
 #endif
@ -234,7 +237,7 @@
 #endif /* WOLFSSL_BIGINT_TYPES */

 /* # of digits this is */
-#define DIGIT_BIT  (int)((CHAR_BIT) * sizeof(fp_digit))
+#define DIGIT_BIT   ((CHAR_BIT) * SIZEOF_FP_DIGIT)

 /* Max size of any number in bits.  Basically the largest size you will be
 * multiplying should be half [or smaller] of FP_MAX_SIZE-four_digit
@ -548,103 +551,38 @@ void fp_reverse(unsigned char *s, int len);

 void fp_mul_comba(fp_int *a, fp_int *b, fp_int *c);

-#ifdef TFM_SMALL_SET
 void fp_mul_comba_small(fp_int *a, fp_int *b, fp_int *c);
-#endif
-
-#ifdef TFM_MUL3
 void fp_mul_comba3(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL4
 void fp_mul_comba4(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL6
 void fp_mul_comba6(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL7
 void fp_mul_comba7(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL8
 void fp_mul_comba8(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL9
 void fp_mul_comba9(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL12
 void fp_mul_comba12(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL17
 void fp_mul_comba17(fp_int *a, fp_int *b, fp_int *c);
-#endif
-
-#ifdef TFM_MUL20
 void fp_mul_comba20(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL24
 void fp_mul_comba24(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL28
 void fp_mul_comba28(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL32
 void fp_mul_comba32(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL48
 void fp_mul_comba48(fp_int *a, fp_int *b, fp_int *c);
-#endif
-#ifdef TFM_MUL64
 void fp_mul_comba64(fp_int *a, fp_int *b, fp_int *c);
-#endif
-
 void fp_sqr_comba(fp_int *a, fp_int *b);
-
-#ifdef TFM_SMALL_SET
 void fp_sqr_comba_small(fp_int *a, fp_int *b);
-#endif
-
-#ifdef TFM_SQR3
 void fp_sqr_comba3(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR4
 void fp_sqr_comba4(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR6
 void fp_sqr_comba6(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR7
 void fp_sqr_comba7(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR8
 void fp_sqr_comba8(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR9
 void fp_sqr_comba9(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR12
 void fp_sqr_comba12(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR17
 void fp_sqr_comba17(fp_int *a, fp_int *b);
-#endif
-
-#ifdef TFM_SQR20
 void fp_sqr_comba20(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR24
 void fp_sqr_comba24(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR28
 void fp_sqr_comba28(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR32
 void fp_sqr_comba32(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR48
 void fp_sqr_comba48(fp_int *a, fp_int *b);
-#endif
-#ifdef TFM_SQR64
 void fp_sqr_comba64(fp_int *a, fp_int *b);
-#endif
+
 /*extern const char *fp_s_rmap;*/