hwcrypto bignum: Use mbedtls_mpi data structures for all bignum data

Still doesn't solve the problem of multiplying two numbers where one is
>2048 bits, needed for RSA support.
This commit is contained in:
Angus Gratton
2016-09-19 18:00:03 +10:00
parent 58aec93dbb
commit 1a6dd44d03
2 changed files with 58 additions and 81 deletions

View File

@@ -207,96 +207,78 @@ static int mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, mbedtls_mpi_uint m
return( mpi_montmul( A, &U, N, mm, T ) ); return( mpi_montmul( A, &U, N, mm, T ) );
} }
#if defined(MBEDTLS_MPI_MUL_MPI_ALT) /* MBEDTLS_MPI_MUL_MPI_ALT */
/* Allocate parameters used by hardware MPI multiply, /* Number of words used to hold 'mpi', rounded up to nearest
and copy mbedtls_mpi structures into them */ 16 words (512 bits) to match hardware support
static int mul_pram_alloc(const mbedtls_mpi *A, const mbedtls_mpi *B, char **pA, char **pB, char **pX, size_t *bites) */
static inline size_t hardware_words_needed(const mbedtls_mpi *mpi)
{ {
char *sa, *sb, *sx; size_t res;
// int algn; for(res = mpi->n; res > 0; res-- ) {
int words, bytes; if( mpi->p[res - 1] != 0 )
int abytes, bbytes; break;
if (A->n > B->n)
words = A->n;
else
words = B->n;
bytes = (words / 16 + ((words % 16) ? 1 : 0 )) * 16 * 4 * 2;
abytes = A->n * 4;
bbytes = B->n * 4;
sa = malloc(bytes);
if (!sa) {
return -1;
} }
res = (res + 0xF) & ~0xF;
sb = malloc(bytes); return res;
if (!sb) {
free(sa);
return -1;
} }
sx = malloc(bytes);
if (!sx) {
free(sa);
free(sb);
return -1;
}
memcpy(sa, A->p, abytes);
memset(sa + abytes, 0, bytes - abytes);
memcpy(sb, B->p, bbytes);
memset(sb + bbytes, 0, bytes - bbytes);
*pA = sa;
*pB = sb;
*pX = sx;
*bites = bytes * 4;
return 0;
}
#if defined(MBEDTLS_MPI_MUL_MPI_ALT)
int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B ) int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
{ {
int ret = -1; int ret = -1;
size_t i, j; size_t words_a, words_b, words_x, words_mult;
char *s1 = NULL, *s2 = NULL, *dest = NULL;
size_t bites;
mbedtls_mpi TA, TB; mbedtls_mpi TA, TB;
mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB ); mbedtls_mpi_init( &TA ); mbedtls_mpi_init( &TB );
if( X == A ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); A = &TA; } /* Count words needed for A & B in hardware */
if( X == B ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); B = &TB; } words_a = hardware_words_needed(A);
words_b = hardware_words_needed(B);
for( i = A->n; i > 0; i-- ) /* Take a copy of A if either X == A OR if A isn't long enough
if( A->p[i - 1] != 0 ) to hold the number of words needed for hardware.
break;
for( j = B->n; j > 0; j-- ) (can't grow A directly as it is const)
if( B->p[j - 1] != 0 )
break;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) ); TODO: growing the input operands is only necessary because the
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); ROM functions only take one length argument. It should be
possible for us to just copy the used data only into the
if (mul_pram_alloc(A, B, &s1, &s2, &dest, &bites)) { hardware buffers, and set the remaining bits to zero - saving
goto cleanup; RAM. But we need to reimplement ets_bigint_mult_prepare() in
software for this.
*/
if( X == A || A->n < words_a) {
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &TA, words_a) );
A = &TA;
}
/* Same for B */
if( X == B || B->n < words_b ) {
MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &TB, words_b) );
B = &TB;
} }
/* Result X has to have room for double the larger operand */
words_mult = (words_a > words_b ? words_a : words_b);
words_x = words_mult * 2;
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, words_x ) );
/* TODO: check if lset here is necessary, hardware should zero */
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
esp_mpi_acquire_hardware(); esp_mpi_acquire_hardware();
if (ets_bigint_mult_prepare((uint32_t *)s1, (uint32_t *)s2, bites)){
if(words_mult * 32 > 2048) {
printf("WARNING: %d bit operands (%d bits * %d bits) too large for hardware unit\n", words_mult * 32, mbedtls_mpi_bitlen(A), mbedtls_mpi_bitlen(B));
}
if (ets_bigint_mult_prepare(A->p, B->p, words_mult * 32)) {
ets_bigint_wait_finish(); ets_bigint_wait_finish();
if (ets_bigint_mult_getz((uint32_t *)dest, bites) == true) { /* NB: argument to bigint_mult_getz is length of inputs, double this number (words_x) is
memcpy(X->p, dest, (i + j) * 4); copied to output X->p.
*/
if (ets_bigint_mult_getz(X->p, words_mult * 32) == true) {
ret = 0; ret = 0;
} else { } else {
printf("ets_bigint_mult_getz failed\n"); printf("ets_bigint_mult_getz failed\n");
@@ -307,11 +289,6 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi
esp_mpi_release_hardware(); esp_mpi_release_hardware();
X->s = A->s * B->s; X->s = A->s * B->s;
free(s1);
free(s2);
free(dest);
cleanup: cleanup:
mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA );

View File

@@ -253,8 +253,8 @@
Disabled as number of limbs limited by bug. Internal TW#7112. Disabled as number of limbs limited by bug. Internal TW#7112.
*/ */
//#define MBEDTLS_MPI_EXP_MOD_ALT #define MBEDTLS_MPI_EXP_MOD_ALT
//#define MBEDTLS_MPI_MUL_MPI_ALT #define MBEDTLS_MPI_MUL_MPI_ALT
/** /**
* \def MBEDTLS_MD2_PROCESS_ALT * \def MBEDTLS_MD2_PROCESS_ALT