mirror of
https://github.com/espressif/esp-idf.git
synced 2025-08-02 12:14:32 +02:00
hwcrypto bignum/MPI: Batch safe DPORT reads to improve performance
This commit is contained in:
committed by
Angus Gratton
parent
cb31222e8b
commit
7be002ec0f
@@ -76,16 +76,19 @@ void esp_mpi_acquire_hardware( void )
|
|||||||
/* newlib locks lazy initialize on ESP-IDF */
|
/* newlib locks lazy initialize on ESP-IDF */
|
||||||
_lock_acquire(&mpi_lock);
|
_lock_acquire(&mpi_lock);
|
||||||
|
|
||||||
DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA);
|
DPORT_STALL_OTHER_CPU_START();
|
||||||
|
{
|
||||||
|
_DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA);
|
||||||
/* also clear reset on digital signature, otherwise RSA is held in reset */
|
/* also clear reset on digital signature, otherwise RSA is held in reset */
|
||||||
DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
|
_DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
|
||||||
DPORT_PERI_EN_RSA
|
DPORT_PERI_EN_RSA
|
||||||
| DPORT_PERI_EN_DIGITAL_SIGNATURE);
|
| DPORT_PERI_EN_DIGITAL_SIGNATURE);
|
||||||
|
|
||||||
DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
|
_DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
|
||||||
|
}
|
||||||
|
DPORT_STALL_OTHER_CPU_END();
|
||||||
|
|
||||||
while(DPORT_REG_READ(RSA_CLEAN_REG) != 1);
|
while(DPORT_REG_READ(RSA_CLEAN_REG) != 1);
|
||||||
|
|
||||||
// Note: from enabling RSA clock to here takes about 1.3us
|
// Note: from enabling RSA clock to here takes about 1.3us
|
||||||
|
|
||||||
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
|
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
|
||||||
@@ -95,11 +98,15 @@ void esp_mpi_acquire_hardware( void )
|
|||||||
|
|
||||||
void esp_mpi_release_hardware( void )
|
void esp_mpi_release_hardware( void )
|
||||||
{
|
{
|
||||||
DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
|
DPORT_STALL_OTHER_CPU_START();
|
||||||
|
{
|
||||||
|
_DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD);
|
||||||
|
|
||||||
/* don't reset digital signature unit, as this resets AES also */
|
/* don't reset digital signature unit, as this resets AES also */
|
||||||
DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_RSA);
|
_DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_RSA);
|
||||||
DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA);
|
_DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_RSA);
|
||||||
|
}
|
||||||
|
DPORT_STALL_OTHER_CPU_END();
|
||||||
|
|
||||||
_lock_release(&mpi_lock);
|
_lock_release(&mpi_lock);
|
||||||
}
|
}
|
||||||
@@ -139,6 +146,9 @@ static inline size_t bits_to_hardware_words(size_t num_bits)
|
|||||||
|
|
||||||
If num_words is higher than the number of words in the bignum then
|
If num_words is higher than the number of words in the bignum then
|
||||||
these additional words will be zeroed in the memory buffer.
|
these additional words will be zeroed in the memory buffer.
|
||||||
|
|
||||||
|
As this function only writes to DPORT memory, no DPORT_STALL_OTHER_CPU_START()
|
||||||
|
is required.
|
||||||
*/
|
*/
|
||||||
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t num_words)
|
static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t num_words)
|
||||||
{
|
{
|
||||||
@@ -146,10 +156,14 @@ static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, s
|
|||||||
uint32_t copy_words = num_words < mpi->n ? num_words : mpi->n;
|
uint32_t copy_words = num_words < mpi->n ? num_words : mpi->n;
|
||||||
|
|
||||||
/* Copy MPI data to memory block registers */
|
/* Copy MPI data to memory block registers */
|
||||||
memcpy(pbase, mpi->p, copy_words * 4);
|
for (int i = 0; i < copy_words; i++) {
|
||||||
|
pbase[i] = mpi->p[i];
|
||||||
|
}
|
||||||
|
|
||||||
/* Zero any remaining memory block data */
|
/* Zero any remaining memory block data */
|
||||||
bzero(pbase + copy_words, (num_words - copy_words) * 4);
|
for (int i = copy_words; i < num_words; i++) {
|
||||||
|
pbase[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Note: not executing memw here, can do it before we start a bignum operation */
|
/* Note: not executing memw here, can do it before we start a bignum operation */
|
||||||
}
|
}
|
||||||
@@ -159,6 +173,8 @@ static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, s
|
|||||||
Reads num_words words from block.
|
Reads num_words words from block.
|
||||||
|
|
||||||
Can return a failure result if fails to grow the MPI result.
|
Can return a failure result if fails to grow the MPI result.
|
||||||
|
|
||||||
|
Cannot be called inside DPORT_STALL_OTHER_CPU_START() (as may allocate memory).
|
||||||
*/
|
*/
|
||||||
static inline int mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
|
static inline int mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
|
||||||
{
|
{
|
||||||
@@ -167,9 +183,13 @@ static inline int mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_wo
|
|||||||
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(x, num_words) );
|
MBEDTLS_MPI_CHK( mbedtls_mpi_grow(x, num_words) );
|
||||||
|
|
||||||
/* Copy data from memory block registers */
|
/* Copy data from memory block registers */
|
||||||
|
DPORT_STALL_OTHER_CPU_START();
|
||||||
|
{
|
||||||
for (size_t i = 0; i < num_words; ++i) {
|
for (size_t i = 0; i < num_words; ++i) {
|
||||||
x->p[i] = DPORT_REG_READ(mem_base + i * 4);
|
x->p[i] = _DPORT_REG_READ(mem_base + i * 4);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
DPORT_STALL_OTHER_CPU_END();
|
||||||
|
|
||||||
/* Zero any remaining limbs in the bignum, if the buffer is bigger
|
/* Zero any remaining limbs in the bignum, if the buffer is bigger
|
||||||
than num_words */
|
than num_words */
|
||||||
@@ -238,10 +258,13 @@ static int calculate_rinv(mbedtls_mpi *Rinv, const mbedtls_mpi *M, int num_words
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Execute RSA operation. op_reg specifies which 'START' register
|
/* Begin an RSA operation. op_reg specifies which 'START' register
|
||||||
to write to.
|
to write to.
|
||||||
|
|
||||||
|
Because the only DPORT operations here are writes,
|
||||||
|
does not need protecting via DPORT_STALL_OTHER_CPU_START();
|
||||||
*/
|
*/
|
||||||
static inline void execute_op(uint32_t op_reg)
|
static inline void start_op(uint32_t op_reg)
|
||||||
{
|
{
|
||||||
/* Clear interrupt status */
|
/* Clear interrupt status */
|
||||||
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
|
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
|
||||||
@@ -250,7 +273,15 @@ static inline void execute_op(uint32_t op_reg)
|
|||||||
to the memory blocks are also complete. */
|
to the memory blocks are also complete. */
|
||||||
|
|
||||||
DPORT_REG_WRITE(op_reg, 1);
|
DPORT_REG_WRITE(op_reg, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for an RSA operation to complete.
|
||||||
|
|
||||||
|
This should NOT be called inside a DPORT_STALL_OTHER_CPU_START(), as it will stall the other CPU for an unacceptably long
|
||||||
|
period (and - depending on config - may require interrupts enabled).
|
||||||
|
*/
|
||||||
|
static inline void wait_op_complete(uint32_t op_reg)
|
||||||
|
{
|
||||||
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
|
#ifdef CONFIG_MBEDTLS_MPI_USE_INTERRUPT
|
||||||
if (!xSemaphoreTake(op_complete_sem, 2000 / portTICK_PERIOD_MS)) {
|
if (!xSemaphoreTake(op_complete_sem, 2000 / portTICK_PERIOD_MS)) {
|
||||||
ESP_LOGE(TAG, "Timed out waiting for RSA operation (op_reg 0x%x int_reg 0x%x)",
|
ESP_LOGE(TAG, "Timed out waiting for RSA operation (op_reg 0x%x int_reg 0x%x)",
|
||||||
@@ -258,12 +289,13 @@ static inline void execute_op(uint32_t op_reg)
|
|||||||
abort(); /* indicates a fundamental problem with driver */
|
abort(); /* indicates a fundamental problem with driver */
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
while(REG_READ(RSA_INTERRUPT_REG) != 1)
|
while(DPORT_REG_READ(RSA_INTERRUPT_REG) != 1)
|
||||||
{ }
|
{ }
|
||||||
#endif
|
|
||||||
|
|
||||||
/* clear the interrupt */
|
/* clear the interrupt */
|
||||||
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
|
DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sub-stages of modulo multiplication/exponentiation operations */
|
/* Sub-stages of modulo multiplication/exponentiation operations */
|
||||||
@@ -287,6 +319,8 @@ int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
|
|||||||
|
|
||||||
esp_mpi_acquire_hardware();
|
esp_mpi_acquire_hardware();
|
||||||
|
|
||||||
|
/* (As the following are all writes to DPORT memory, no DPORT_STALL_OTHER_CPU_START is required.) */
|
||||||
|
|
||||||
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
|
/* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
|
||||||
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
|
mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
|
||||||
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
|
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
|
||||||
@@ -297,10 +331,12 @@ int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
|
|||||||
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (num_words / 16) - 1);
|
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (num_words / 16) - 1);
|
||||||
|
|
||||||
/* Execute first stage montgomery multiplication */
|
/* Execute first stage montgomery multiplication */
|
||||||
execute_op(RSA_MULT_START_REG);
|
start_op(RSA_MULT_START_REG);
|
||||||
|
|
||||||
|
wait_op_complete(RSA_MULT_START_REG);
|
||||||
|
|
||||||
/* execute second stage */
|
/* execute second stage */
|
||||||
MBEDTLS_MPI_CHK( modular_multiply_finish(Z, X, Y, num_words) );
|
ret = modular_multiply_finish(Z, X, Y, num_words);
|
||||||
|
|
||||||
esp_mpi_release_hardware();
|
esp_mpi_release_hardware();
|
||||||
|
|
||||||
@@ -366,6 +402,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi
|
|||||||
|
|
||||||
esp_mpi_acquire_hardware();
|
esp_mpi_acquire_hardware();
|
||||||
|
|
||||||
|
/* (As the following are all writes to DPORT memory, no DPORT_STALL_OTHER_CPU_START is required.) */
|
||||||
|
|
||||||
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
|
/* "mode" register loaded with number of 512-bit blocks, minus 1 */
|
||||||
DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (num_words / 16) - 1);
|
DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (num_words / 16) - 1);
|
||||||
|
|
||||||
@@ -376,10 +414,11 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi
|
|||||||
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
|
mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
|
||||||
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
|
DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
|
||||||
|
|
||||||
execute_op(RSA_START_MODEXP_REG);
|
start_op(RSA_START_MODEXP_REG);
|
||||||
|
|
||||||
|
wait_op_complete(RSA_START_MODEXP_REG);
|
||||||
|
|
||||||
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, num_words);
|
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, num_words);
|
||||||
|
|
||||||
esp_mpi_release_hardware();
|
esp_mpi_release_hardware();
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
@@ -407,11 +446,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi* Z, const mbedtls_mpi* X, const mbedtls_mpi
|
|||||||
*/
|
*/
|
||||||
static int modular_multiply_finish(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
|
static int modular_multiply_finish(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
|
||||||
/* Load Y to X input memory block, rerun */
|
/* Load Y to X input memory block, rerun */
|
||||||
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, num_words);
|
mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, Y, num_words);
|
||||||
|
|
||||||
execute_op(RSA_MULT_START_REG);
|
start_op(RSA_MULT_START_REG);
|
||||||
|
|
||||||
|
wait_op_complete(RSA_MULT_START_REG);
|
||||||
|
|
||||||
/* Read result into Z */
|
/* Read result into Z */
|
||||||
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, num_words);
|
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, num_words);
|
||||||
@@ -429,7 +471,7 @@ static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbe
|
|||||||
/* Z = X * Y */
|
/* Z = X * Y */
|
||||||
int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y )
|
int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y )
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
size_t bits_x, bits_y, words_x, words_y, words_mult, words_z;
|
size_t bits_x, bits_y, words_x, words_y, words_mult, words_z;
|
||||||
|
|
||||||
/* Count words needed for X & Y in hardware */
|
/* Count words needed for X & Y in hardware */
|
||||||
@@ -511,7 +553,9 @@ int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi
|
|||||||
*/
|
*/
|
||||||
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (words_z / 16) + 7);
|
DPORT_REG_WRITE(RSA_MULT_MODE_REG, (words_z / 16) + 7);
|
||||||
|
|
||||||
execute_op(RSA_MULT_START_REG);
|
start_op(RSA_MULT_START_REG);
|
||||||
|
|
||||||
|
wait_op_complete(RSA_MULT_START_REG);
|
||||||
|
|
||||||
/* Read back the result */
|
/* Read back the result */
|
||||||
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, words_z);
|
ret = mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, words_z);
|
||||||
@@ -566,14 +610,15 @@ static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X,
|
|||||||
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
|
DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
execute_op(RSA_MULT_START_REG);
|
start_op(RSA_MULT_START_REG);
|
||||||
|
|
||||||
|
wait_op_complete(RSA_MULT_START_REG);
|
||||||
|
|
||||||
/* finish the modular multiplication */
|
/* finish the modular multiplication */
|
||||||
MBEDTLS_MPI_CHK( modular_multiply_finish(Z, X, Y, num_words) );
|
ret = modular_multiply_finish(Z, X, Y, num_words);
|
||||||
|
|
||||||
esp_mpi_release_hardware();
|
esp_mpi_release_hardware();
|
||||||
|
|
||||||
cleanup:
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -596,7 +641,7 @@ static int mpi_mult_mpi_failover_mod_mult(mbedtls_mpi *Z, const mbedtls_mpi *X,
|
|||||||
*/
|
*/
|
||||||
static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t bits_y, size_t words_result)
|
static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t bits_y, size_t words_result)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
mbedtls_mpi Ztemp;
|
mbedtls_mpi Ztemp;
|
||||||
const size_t limbs_y = (bits_y + biL - 1) / biL;
|
const size_t limbs_y = (bits_y + biL - 1) / biL;
|
||||||
/* Rather than slicing in two on bits we slice on limbs (32 bit words) */
|
/* Rather than slicing in two on bits we slice on limbs (32 bit words) */
|
||||||
|
Reference in New Issue
Block a user