hwcrypto bignum: Use mbedtls_mpi data structures for all bignum data

Still doesn't solve the problem of multiplying two numbers where one is
>2048 bits, needed for RSA support.
This commit is contained in:
Angus Gratton
2016-09-19 18:00:03 +10:00
parent 58aec93dbb
commit 1a6dd44d03
2 changed files with 58 additions and 81 deletions

View File

@@ -207,98 +207,80 @@ static int mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, mbedtls_mpi_uint m
return( mpi_montmul( A, &U, N, mm, T ) ); return( mpi_montmul( A, &U, N, mm, T ) );
} }
#if defined(MBEDTLS_MPI_MUL_MPI_ALT) /* MBEDTLS_MPI_MUL_MPI_ALT */
/* Allocate parameters used by hardware MPI multiply, /* Number of words used to hold 'mpi', rounded up to nearest
and copy mbedtls_mpi structures into them */ 16 words (512 bits) to match hardware support
static int mul_pram_alloc(const mbedtls_mpi *A, const mbedtls_mpi *B, char **pA, char **pB, char **pX, size_t *bites) */
static inline size_t hardware_words_needed(const mbedtls_mpi *mpi)
{ {
char *sa, *sb, *sx; size_t res;
// int algn; for(res = mpi->n; res > 0; res-- ) {
int words, bytes; if( mpi->p[res - 1] != 0 )
int abytes, bbytes; break;
}
if (A->n > B->n) res = (res + 0xF) & ~0xF;
words = A->n; return res;
else
words = B->n;
bytes = (words / 16 + ((words % 16) ? 1 : 0 )) * 16 * 4 * 2;
abytes = A->n * 4;
bbytes = B->n * 4;
sa = malloc(bytes);
if (!sa) {
return -1;
}
sb = malloc(bytes);
if (!sb) {
free(sa);
return -1;
}
sx = malloc(bytes);
if (!sx) {
free(sa);
free(sb);
return -1;
}
memcpy(sa, A->p, abytes);
memset(sa + abytes, 0, bytes - abytes);
memcpy(sb, B->p, bbytes);
memset(sb + bbytes, 0, bytes - bbytes);
*pA = sa;
*pB = sb;
*pX = sx;
*bites = bytes * 4;
return 0;
} }
#if defined(MBEDTLS_MPI_MUL_MPI_ALT)
int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
{ {
int ret = -1; int ret = -1;
size_t i, j; size_t words_a, words_b, words_x, words_mult;
char *s1 = NULL, *s2 = NULL, *dest = NULL;
size_t bites;
mbedtls_mpi TA, TB; mbedtls_mpi TA, TB;
mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB ); mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB );
if( X == A ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); A = &TA; } /* Count words needed for A & B in hardware */
if( X == B ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); B = &TB; } words_a = hardware_words_needed(A);
words_b = hardware_words_needed(B);
for( i = A->n; i > 0; i-- ) /* Take a copy of A if either X == A OR if A isn't long enough
if( A->p[i - 1] != 0 ) to hold the number of words needed for hardware.
break;
for( j = B->n; j > 0; j-- ) (can't grow A directly as it is const)
if( B->p[j - 1] != 0 )
break;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) ); TODO: growing the input operands is only necessary because the
ROM functions only take one length argument. It should be
possible for us to just copy the used data only into the
hardware buffers, and set the remaining bits to zero - saving
RAM. But we need to reimplement ets_bigint_mult_prepare() in
software for this.
*/
if( X == A || A->n < words_a) {
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &TA, words_a) );
A = &TA;
}
/* Same for B */
if( X == B || B->n < words_b ) {
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &TB, words_b) );
B = &TB;
}
/* Result X has to have room for double the larger operand */
words_mult = (words_a > words_b ? words_a : words_b);
words_x = words_mult * 2;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, words_x ) );
/* TODO: check if lset here is necessary, hardware should zero */
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
if (mul_pram_alloc(A, B, &s1, &s2, &dest, &bites)) {
goto cleanup;
}
esp_mpi_acquire_hardware(); esp_mpi_acquire_hardware();
if (ets_bigint_mult_prepare((uint32_t *)s1, (uint32_t *)s2, bites)){
ets_bigint_wait_finish(); if(words_mult * 32 > 2048) {
if (ets_bigint_mult_getz((uint32_t *)dest, bites) == true) { printf("WARNING: %d bit operands (%d bits * %d bits) too large for hardware unit\n", words_mult * 32, mbedtls_mpi_bitlen(A), mbedtls_mpi_bitlen(B));
memcpy(X->p, dest, (i + j) * 4); }
ret = 0;
} else { if (ets_bigint_mult_prepare(A->p, B->p, words_mult * 32)) {
ets_bigint_wait_finish();
/* NB: argument to bigint_mult_getz is length of inputs, double this number (words_x) is
copied to output X->p.
*/
if (ets_bigint_mult_getz(X->p, words_mult * 32) == true) {
ret = 0;
} else {
printf("ets_bigint_mult_getz failed\n"); printf("ets_bigint_mult_getz failed\n");
} }
} else{ } else{
@@ -307,11 +289,6 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi
esp_mpi_release_hardware(); esp_mpi_release_hardware();
X->s = A->s * B->s; X->s = A->s * B->s;
free(s1);
free(s2);
free(dest);
cleanup: cleanup:
mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA );

View File

@@ -253,8 +253,8 @@
Disabled as number of limbs limited by bug. Internal TW#7112. Disabled as number of limbs limited by bug. Internal TW#7112.
*/ */
//#define MBEDTLS_MPI_EXP_MOD_ALT #define MBEDTLS_MPI_EXP_MOD_ALT
//#define MBEDTLS_MPI_MUL_MPI_ALT #define MBEDTLS_MPI_MUL_MPI_ALT
/** /**
* \def MBEDTLS_MD2_PROCESS_ALT * \def MBEDTLS_MD2_PROCESS_ALT