From 2abb656ba2d14720ca7c22c636244cd9351b4643 Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Tue, 12 Dec 2023 21:12:41 +0530 Subject: [PATCH] feat(mbedtls/aes): Support AES-DMA operations by satisfying L1 cache alignment requirements - Use DMA RX done interrupt status bit while waiting for DMA rx transfer --- components/hal/include/hal/dma_types.h | 5 - components/mbedtls/CMakeLists.txt | 2 + components/mbedtls/port/aes/dma/esp_aes.c | 618 ----------- .../mbedtls/port/aes/dma/esp_aes_dma_core.c | 992 ++++++++++++++++++ .../mbedtls/port/aes/dma/esp_aes_gdma_impl.c | 6 +- components/mbedtls/port/aes/esp_aes_gcm.c | 9 +- .../port/aes/include/esp_aes_internal.h | 16 +- .../esp_crypto_shared_gdma.c | 41 +- .../port/include/esp_crypto_shared_gdma.h | 11 +- 9 files changed, 1048 insertions(+), 652 deletions(-) create mode 100644 components/mbedtls/port/aes/dma/esp_aes_dma_core.c diff --git a/components/hal/include/hal/dma_types.h b/components/hal/include/hal/dma_types.h index 52e6c542c7..435ac5cd96 100644 --- a/components/hal/include/hal/dma_types.h +++ b/components/hal/include/hal/dma_types.h @@ -59,11 +59,6 @@ ESP_STATIC_ASSERT(sizeof(dma_descriptor_align8_t) == 16, "dma_descriptor_align8_ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED (4095-3) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 4B */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED (4095-15) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 16B */ -// the size field has 12 bits, but 0 not for 4096. -// to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. -/** Maximum size of data in the buffer that a DMA descriptor can hold. */ -#define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC (4096-4) - /** * Get the number of DMA descriptors required for a given buffer size. * diff --git a/components/mbedtls/CMakeLists.txt b/components/mbedtls/CMakeLists.txt index 9a2a3d31dd..e92ac5d0a4 100644 --- a/components/mbedtls/CMakeLists.txt +++ b/components/mbedtls/CMakeLists.txt @@ -191,6 +191,8 @@ if(AES_PERIPHERAL_TYPE STREQUAL "dma") set(AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_gdma_impl.c") endif() + list(APPEND AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_dma_core.c") + target_include_directories(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/aes/dma/include") target_sources(mbedcrypto PRIVATE "${AES_DMA_SRCS}") endif() diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 4be296b826..d10c07c5e2 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -28,27 +28,11 @@ #include #include "mbedtls/aes.h" -#include "mbedtls/platform_util.h" -#include "esp_intr_alloc.h" #include "esp_private/periph_ctrl.h" #include "esp_log.h" -#include "esp_attr.h" -#include "esp_crypto_dma.h" -#include "esp_heap_caps.h" -#include "esp_memory_utils.h" -#include "esp_cache.h" -#include "sys/param.h" -#if CONFIG_PM_ENABLE -#include "esp_pm.h" -#endif #include "esp_crypto_lock.h" #include "hal/aes_hal.h" -#include "esp_aes_dma_priv.h" #include "esp_aes_internal.h" -#include "esp_private/esp_cache_private.h" - -#include "freertos/FreeRTOS.h" -#include "freertos/semphr.h" #if SOC_AES_GDMA #define AES_LOCK() esp_crypto_sha_aes_lock_acquire() @@ -58,123 +42,7 @@ #define AES_RELEASE() esp_crypto_dma_lock_release() #endif -/* Max size of each chunk to process when output buffer is in unaligned external ram - must be a multiple of block size -*/ -#define AES_MAX_CHUNK_WRITE_SIZE 1600 - -/* Input over this length will yield and wait for interrupt instead of - busy-waiting, 30000 bytes is approx 0.5 ms */ -#define AES_DMA_INTR_TRIG_LEN 2000 - -/* With buffers in PSRAM (worst condition) we still achieve a speed of 4 MB/s - thus a 2 second timeout value should be suffient for even very large buffers. - */ -#define AES_WAIT_INTR_TIMEOUT_MS 2000 - -#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT) -static SemaphoreHandle_t op_complete_sem; -#if defined(CONFIG_PM_ENABLE) -static esp_pm_lock_handle_t s_pm_cpu_lock; -static esp_pm_lock_handle_t s_pm_sleep_lock; -#endif -#endif - -#if SOC_PSRAM_DMA_CAPABLE - -#if (CONFIG_ESP32S2_DATA_CACHE_LINE_16B || CONFIG_ESP32S3_DATA_CACHE_LINE_16B) -#define DCACHE_LINE_SIZE 16 -#elif (CONFIG_ESP32S2_DATA_CACHE_LINE_32B || CONFIG_ESP32S3_DATA_CACHE_LINE_32B) -#define DCACHE_LINE_SIZE 32 -#elif CONFIG_ESP32S3_DATA_CACHE_LINE_64B -#define DCACHE_LINE_SIZE 64 -#endif //(CONFIG_ESP32S2_DATA_CACHE_LINE_16B || CONFIG_ESP32S3_DATA_CACHE_LINE_16B) - -#endif //SOC_PSRAM_DMA_CAPABLE - static const char *TAG = "esp-aes"; -static bool s_check_dma_capable(const void *p); - -/* These are static due to: - * * Must be in DMA capable memory, so stack is not a safe place to put them - * * To avoid having to malloc/free them for every DMA operation - */ -static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; -static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; -static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; -static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; - -/** Append a descriptor to the chain, set head if chain empty - * - * @param[out] head Pointer to the first/head node of the DMA descriptor linked list - * @param item Pointer to the DMA descriptor node that has to be appended - */ -static inline void dma_desc_append(crypto_dma_desc_t **head, crypto_dma_desc_t *item) -{ - crypto_dma_desc_t *it; - if (*head == NULL) { - *head = item; - return; - } - - it = *head; - - while (it->next != 0) { - it = (crypto_dma_desc_t *)it->next; - } - it->dw0.suc_eof = 0; - it->next = item; -} - -/** - * Generate a linked list pointing to a (huge) buffer in an descriptor array. - * - * The caller should ensure there is enough size to hold the array, by calling - * `dma_desc_get_required_num` with the same or less than the max_desc_size argument. - * - * @param[out] dmadesc Output of a descriptor array, the head should be fed to the DMA. - * @param data Buffer for the descriptors to point to. - * @param len Size (or length for TX) of the buffer - * @param max_desc_size Maximum length of each descriptor - * @param isrx The RX DMA may require the buffer to be word-aligned, set to true for a RX link, otherwise false. - */ -static inline void dma_desc_setup_link(crypto_dma_desc_t* dmadesc, const void *data, int len, int max_desc_size, bool isrx) -{ - int i = 0; - while (len) { - int dmachunklen = len; - if (dmachunklen > max_desc_size) { - dmachunklen = max_desc_size; - } - if (isrx) { - //Receive needs DMA length rounded to next 32-bit boundary - dmadesc[i].dw0.size = (dmachunklen + 3) & (~3); - dmadesc[i].dw0.length = (dmachunklen + 3) & (~3); - } else { - dmadesc[i].dw0.size = dmachunklen; - dmadesc[i].dw0.length = dmachunklen; - } - dmadesc[i].buffer = (void *)data; - dmadesc[i].dw0.suc_eof = 0; - dmadesc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; - dmadesc[i].next = &dmadesc[i + 1]; - len -= dmachunklen; - data += dmachunklen; - i++; - } - dmadesc[i - 1].dw0.suc_eof = 1; //Mark last DMA desc as end of stream. - dmadesc[i - 1].next = NULL; -} - -static inline void esp_aes_wait_dma_done(crypto_dma_desc_t *output) -{ - /* Wait for DMA write operation to complete */ - while (1) { - if ( esp_aes_dma_done(output) ) { - break; - } - } -} void esp_aes_acquire_hardware( void ) { @@ -202,481 +70,6 @@ void esp_aes_release_hardware( void ) AES_RELEASE(); } - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) -static IRAM_ATTR void esp_aes_complete_isr(void *arg) -{ - BaseType_t higher_woken; - aes_hal_interrupt_clear(); - xSemaphoreGiveFromISR(op_complete_sem, &higher_woken); - if (higher_woken) { - portYIELD_FROM_ISR(); - } -} - -void esp_aes_intr_alloc(void) -{ - if (op_complete_sem == NULL) { - const int isr_flags = esp_intr_level_to_flags(CONFIG_MBEDTLS_AES_INTERRUPT_LEVEL); - - esp_err_t ret = esp_intr_alloc(ETS_AES_INTR_SOURCE, isr_flags, esp_aes_complete_isr, NULL, NULL); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to allocate AES interrupt %d", ret); - // This should be treated as fatal error as this API would mostly - // be invoked within mbedTLS interface. There is no way for the system - // to proceed if the AES interrupt allocation fails here. - abort(); - } - - static StaticSemaphore_t op_sem_buf; - op_complete_sem = xSemaphoreCreateBinaryStatic(&op_sem_buf); - // Static semaphore creation is unlikley to fail but still basic sanity - assert(op_complete_sem != NULL); - } -} - -static esp_err_t esp_aes_isr_initialise( void ) -{ - aes_hal_interrupt_clear(); - aes_hal_interrupt_enable(true); - - /* AES is clocked proportionally to CPU clock, take power management lock */ -#ifdef CONFIG_PM_ENABLE - if (s_pm_cpu_lock == NULL) { - if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM sleep lock"); - return ESP_FAIL; - } - if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM CPU lock"); - return ESP_FAIL; - } - } - esp_pm_lock_acquire(s_pm_cpu_lock); - esp_pm_lock_acquire(s_pm_sleep_lock); -#endif - - return ESP_OK; -} -#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT - -/* Wait for AES hardware block operation to complete */ -static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc_head, crypto_dma_desc_t *output_desc_tail) -{ -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - if (use_intr) { - if (!xSemaphoreTake(op_complete_sem, AES_WAIT_INTR_TIMEOUT_MS / portTICK_PERIOD_MS)) { - /* indicates a fundamental problem with driver */ - ESP_LOGE(TAG, "Timed out waiting for completion of AES Interrupt"); - return -1; - } -#ifdef CONFIG_PM_ENABLE - esp_pm_lock_release(s_pm_cpu_lock); - esp_pm_lock_release(s_pm_sleep_lock); -#endif // CONFIG_PM_ENABLE - } -#endif - /* Checking this if interrupt is used also, to avoid - issues with AES fault injection - */ - aes_hal_wait_done(); - -#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - const crypto_dma_desc_t *it = output_desc_head; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - }; -#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ - - esp_aes_wait_dma_done(output_desc_tail); - return 0; -} - - -static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out); - - -/* Output buffers in external ram needs to be 16-byte aligned and DMA cant access input in the iCache mem range, - reallocate them into internal memory and encrypt in chunks to avoid - having to malloc too big of a buffer - - The function esp_aes_process_dma_ext_ram zeroises the output buffer in the case of memory allocation failure. -*/ - -static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output) -{ - size_t chunk_len; - int ret = 0; - int offset = 0; - unsigned char *input_buf = NULL; - unsigned char *output_buf = NULL; - const unsigned char *dma_input; - chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); - - if (realloc_input) { - input_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); - - if (input_buf == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - } - - if (realloc_output) { - output_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); - - if (output_buf == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - } else { - output_buf = output; - } - - while (len) { - chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); - - /* If input needs realloc then copy it, else use the input with offset*/ - if (realloc_input) { - memcpy(input_buf, input + offset, chunk_len); - dma_input = input_buf; - } else { - dma_input = input + offset; - } - - if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) { - ret = -1; - goto cleanup; - } - - if (realloc_output) { - memcpy(output + offset, output_buf, chunk_len); - } else { - output_buf = output + offset + chunk_len; - } - - len -= chunk_len; - offset += chunk_len; - } - -cleanup: - - if (realloc_input) { - free(input_buf); - } - if (realloc_output) { - free(output_buf); - } - - return ret; -} - -/* Encrypt/decrypt the input using DMA - * The function esp_aes_process_dma zeroises the output buffer in the case of following conditions: - * 1. If key is not written in the hardware - * 2. Memory allocation failures - * 3. If AES interrupt is enabled and ISR initialisation fails - * 4. Failure in any of the AES operations - */ -static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) -{ - crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL; - crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t crypto_dma_desc_num = 0; - unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block - unsigned block_bytes = len - stream_bytes; // bytes which are in a full block - unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); - bool use_intr = false; - bool input_needs_realloc = false; - bool output_needs_realloc = false; - int ret = 0; - - assert(len > 0); // caller shouldn't ever have len set to zero - assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) - - /* If no key is written to hardware yet, either the user hasn't called - mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't - know which mode to use - or a fault skipped the - key write to hardware. Treat this as a fatal error and zero the output block. - */ - if (ctx->key_in_hardware != ctx->key_bytes) { - mbedtls_platform_zeroize(output, len); - return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; - } - - if (block_bytes > 0) { - /* Flush cache if input in external ram */ -#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) - if (esp_ptr_external_ram(input)) { - esp_cache_msync((void *)input, len, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - } - if (esp_ptr_external_ram(output)) { - uint32_t dcache_line_size; - esp_err_t ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &dcache_line_size); - if (ret != ESP_OK) { - return ret; - } - if ((((intptr_t)(output) & (dcache_line_size - 1)) != 0) || (block_bytes % dcache_line_size != 0)) { - // Non aligned ext-mem buffer - output_needs_realloc = true; - } - } -#endif - /* DMA cannot access memory in the iCache range, copy input to internal ram */ - if (!s_check_dma_capable(input)) { - input_needs_realloc = true; - } - - if (!s_check_dma_capable(output)) { - output_needs_realloc = true; - } - - /* If either input or output is unaccessible to the DMA then they need to be reallocated */ - if (input_needs_realloc || output_needs_realloc) { - return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); - } - - /* Set up dma descriptors for input and output considering the 16 byte alignment requirement for EDMA */ - crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); - - /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_aligned_calloc(8, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); - if (block_desc == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - - block_in_desc = block_desc; - block_out_desc = block_desc + crypto_dma_desc_num; - - dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - //Limit max inlink descriptor length to be 16 byte aligned, require for EDMA - dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED, 0); - - /* Setup in/out start descriptors */ - dma_desc_append(&in_desc_head, block_in_desc); - dma_desc_append(&out_desc_head, block_out_desc); - - out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; - } - - /* Any leftover bytes which are appended as an additional DMA list */ - if (stream_bytes > 0) { - - memset(&s_stream_in_desc, 0, sizeof(crypto_dma_desc_t)); - memset(&s_stream_out_desc, 0, sizeof(crypto_dma_desc_t)); - - memset(s_stream_in, 0, AES_BLOCK_BYTES); - memset(s_stream_out, 0, AES_BLOCK_BYTES); - - memcpy(s_stream_in, input + block_bytes, stream_bytes); - - dma_desc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - /* Link with block descriptors */ - dma_desc_append(&in_desc_head, &s_stream_in_desc); - dma_desc_append(&out_desc_head, &s_stream_out_desc); - - out_desc_tail = &s_stream_out_desc; - } - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - /* Only use interrupt for long AES operations */ - if (len > AES_DMA_INTR_TRIG_LEN) { - use_intr = true; - if (esp_aes_isr_initialise() != ESP_OK) { - ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); - ret = -1; - goto cleanup; - } - } else -#endif - { - aes_hal_interrupt_enable(false); - } - - if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { - ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_start(blocks); - - if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { - ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); - ret = -1; - goto cleanup; - } - -#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) - if (block_bytes > 0) { - if (esp_ptr_external_ram(output)) { - esp_cache_msync((void*)output, block_bytes, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - } - } -#endif - aes_hal_transform_dma_finish(); - - if (stream_bytes > 0) { - memcpy(output + block_bytes, s_stream_out, stream_bytes); - memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES); - } - -cleanup: - if (ret != 0) { - mbedtls_platform_zeroize(output, len); - } - free(block_desc); - return ret; -} - - -#if CONFIG_MBEDTLS_HARDWARE_GCM - -/* Encrypt/decrypt with AES-GCM the input using DMA - * The function esp_aes_process_dma_gcm zeroises the output buffer in the case of following conditions: - * 1. If key is not written in the hardware - * 2. Memory allocation failures - * 3. If AES interrupt is enabled and ISR initialisation fails - * 4. Failure in any of the AES operations - */ -int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len) -{ - crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; - crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - crypto_dma_desc_t stream_in_desc, stream_out_desc; - crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t crypto_dma_desc_num = 0; - uint32_t len_buf[4] = {}; - uint8_t stream_in[16] = {}; - uint8_t stream_out[16] = {}; - unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block - unsigned block_bytes = len - stream_bytes; // bytes which are in a full block - - unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); - - bool use_intr = false; - int ret = 0; - - /* If no key is written to hardware yet, either the user hasn't called - mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't - know which mode to use - or a fault skipped the - key write to hardware. Treat this as a fatal error and zero the output block. - */ - if (ctx->key_in_hardware != ctx->key_bytes) { - mbedtls_platform_zeroize(output, len); - return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; - } - - /* Set up dma descriptors for input and output */ - crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC); - - /* Allocate both in and out descriptors to save a malloc/free per function call, add 1 for length descriptor */ - block_desc = heap_caps_calloc((crypto_dma_desc_num * 2) + 1, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); - if (block_desc == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - - block_in_desc = block_desc; - len_desc = block_desc + crypto_dma_desc_num; - block_out_desc = block_desc + crypto_dma_desc_num + 1; - - if (aad_desc != NULL) { - dma_desc_append(&in_desc_head, aad_desc); - } - - if (block_bytes > 0) { - dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - dma_desc_append(&in_desc_head, block_in_desc); - dma_desc_append(&out_desc_head, block_out_desc); - - out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; - } - - /* Any leftover bytes which are appended as an additional DMA list */ - if (stream_bytes > 0) { - memcpy(stream_in, input + block_bytes, stream_bytes); - - dma_desc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - dma_desc_append(&in_desc_head, &stream_in_desc); - dma_desc_append(&out_desc_head, &stream_out_desc); - - out_desc_tail = &stream_out_desc; - } - - - len_buf[1] = __builtin_bswap32(aad_len * 8); - len_buf[3] = __builtin_bswap32(len * 8); - - len_desc->dw0.length = sizeof(len_buf); - len_desc->dw0.size = sizeof(len_buf); - len_desc->dw0.owner = 1; - len_desc->dw0.suc_eof = 1; - len_desc->buffer = (uint8_t *)len_buf; - - dma_desc_append(&in_desc_head, len_desc); - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - /* Only use interrupt for long AES operations */ - if (len > AES_DMA_INTR_TRIG_LEN) { - use_intr = true; - if (esp_aes_isr_initialise() != ESP_OK) { - ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); - ret = -1; - goto cleanup; - } - } else -#endif - { - aes_hal_interrupt_enable(false); - } - - /* Start AES operation */ - if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { - ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_gcm_start(blocks); - - if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { - ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_finish(); - - if (stream_bytes > 0) { - memcpy(output + block_bytes, stream_out, stream_bytes); - } - -cleanup: - if (ret != 0) { - mbedtls_platform_zeroize(output, len); - } - free(block_desc); - return ret; -} - -#endif //CONFIG_MBEDTLS_HARDWARE_GCM - static int esp_aes_validate_input(esp_aes_context *ctx, const unsigned char *input, unsigned char *output ) { @@ -1147,14 +540,3 @@ int esp_aes_crypt_ctr(esp_aes_context *ctx, return 0; } - -static bool s_check_dma_capable(const void *p) -{ - bool is_capable = false; -#if CONFIG_SPIRAM - is_capable |= esp_ptr_dma_ext_capable(p); -#endif - is_capable |= esp_ptr_dma_capable(p); - - return is_capable; -} diff --git a/components/mbedtls/port/aes/dma/esp_aes_dma_core.c b/components/mbedtls/port/aes/dma/esp_aes_dma_core.c new file mode 100644 index 0000000000..0aaa0737e1 --- /dev/null +++ b/components/mbedtls/port/aes/dma/esp_aes_dma_core.c @@ -0,0 +1,992 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include "esp_attr.h" +#include "esp_cache.h" +#include "esp_check.h" +#include "esp_dma_utils.h" +#include "esp_err.h" +#include "esp_heap_caps.h" +#include "esp_intr_alloc.h" +#include "esp_log.h" +#include "esp_memory_utils.h" +#include "esp_private/esp_cache_private.h" +#include "esp_private/periph_ctrl.h" + +#if CONFIG_PM_ENABLE +#include "esp_pm.h" +#endif +#include "hal/aes_hal.h" + +#include "esp_aes_dma_priv.h" +#include "esp_aes_internal.h" +#include "esp_crypto_dma.h" + +#include "freertos/FreeRTOS.h" +#include "freertos/semphr.h" + +#include "mbedtls/aes.h" +#include "mbedtls/platform_util.h" + +#if SOC_AES_SUPPORT_GCM +#include "aes/esp_aes_gcm.h" +#endif + +/* Max size of each chunk to process when output buffer is in unaligned external ram + must be a multiple of block size +*/ +#define AES_MAX_CHUNK_WRITE_SIZE 1600 + +/* Input over this length will yield and wait for interrupt instead of + busy-waiting, 30000 bytes is approx 0.5 ms */ +#define AES_DMA_INTR_TRIG_LEN 2000 + +/* With buffers in PSRAM (worst condition) we still achieve a speed of 4 MB/s + thus a 2 second timeout value should be suffient for even very large buffers. + */ +#define AES_WAIT_INTR_TIMEOUT_MS 2000 + +#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT) +static SemaphoreHandle_t op_complete_sem; +#if defined(CONFIG_PM_ENABLE) +static esp_pm_lock_handle_t s_pm_cpu_lock; +static esp_pm_lock_handle_t s_pm_sleep_lock; +#endif +#endif + +static const char *TAG = "esp-aes"; + +static bool s_check_dma_capable(const void *p) +{ + bool is_capable = false; +#if CONFIG_SPIRAM + is_capable |= esp_ptr_dma_ext_capable(p); +#endif + is_capable |= esp_ptr_dma_capable(p); + + return is_capable; +} + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) +static IRAM_ATTR void esp_aes_complete_isr(void *arg) +{ + BaseType_t higher_woken; + aes_hal_interrupt_clear(); + xSemaphoreGiveFromISR(op_complete_sem, &higher_woken); + if (higher_woken) { + portYIELD_FROM_ISR(); + } +} + +void esp_aes_intr_alloc(void) +{ + if (op_complete_sem == NULL) { + const int isr_flags = esp_intr_level_to_flags(CONFIG_MBEDTLS_AES_INTERRUPT_LEVEL); + + esp_err_t ret = esp_intr_alloc(ETS_AES_INTR_SOURCE, isr_flags, esp_aes_complete_isr, NULL, NULL); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "Failed to allocate AES interrupt %d", ret); + // This should be treated as fatal error as this API would mostly + // be invoked within mbedTLS interface. There is no way for the system + // to proceed if the AES interrupt allocation fails here. + abort(); + } + + static StaticSemaphore_t op_sem_buf; + op_complete_sem = xSemaphoreCreateBinaryStatic(&op_sem_buf); + // Static semaphore creation is unlikley to fail but still basic sanity + assert(op_complete_sem != NULL); + } +} + +static esp_err_t esp_aes_isr_initialise( void ) +{ + aes_hal_interrupt_clear(); + aes_hal_interrupt_enable(true); + + /* AES is clocked proportionally to CPU clock, take power management lock */ +#ifdef CONFIG_PM_ENABLE + if (s_pm_cpu_lock == NULL) { + if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) { + ESP_LOGE(TAG, "Failed to create PM sleep lock"); + return ESP_FAIL; + } + if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) { + ESP_LOGE(TAG, "Failed to create PM CPU lock"); + return ESP_FAIL; + } + } + esp_pm_lock_acquire(s_pm_cpu_lock); + esp_pm_lock_acquire(s_pm_sleep_lock); +#endif + + return ESP_OK; +} +#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT + +static inline void esp_aes_wait_dma_done(crypto_dma_desc_t *output) +{ + /* Wait for DMA write operation to complete */ + while (1) { + if ( esp_aes_dma_done(output) ) { + break; + } + } +} + +/* Wait for AES hardware block operation to complete */ +static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc_tail) +{ +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + if (use_intr) { + if (!xSemaphoreTake(op_complete_sem, AES_WAIT_INTR_TIMEOUT_MS / portTICK_PERIOD_MS)) { + /* indicates a fundamental problem with driver */ + ESP_LOGE(TAG, "Timed out waiting for completion of AES Interrupt"); + return -1; + } +#ifdef CONFIG_PM_ENABLE + esp_pm_lock_release(s_pm_cpu_lock); + esp_pm_lock_release(s_pm_sleep_lock); +#endif // CONFIG_PM_ENABLE + } +#endif + /* Checking this if interrupt is used also, to avoid + issues with AES fault injection + */ + aes_hal_wait_done(); + + esp_aes_wait_dma_done(output_desc_tail); + return 0; +} + + +/* Output buffers in external ram needs to be 16-byte aligned and DMA cant access input in the iCache mem range, + reallocate them into internal memory and encrypt in chunks to avoid + having to malloc too big of a buffer + + The function esp_aes_process_dma_ext_ram zeroises the output buffer in the case of memory allocation failure. +*/ + +static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output) +{ + size_t chunk_len; + int ret = 0; + int offset = 0; + unsigned char *input_buf = NULL; + unsigned char *output_buf = NULL; + const unsigned char *dma_input; + chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); + + if (realloc_input) { + input_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); + + if (input_buf == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + } + + if (realloc_output) { + output_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); + + if (output_buf == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + } else { + output_buf = output; + } + + while (len) { + chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); + + /* If input needs realloc then copy it, else use the input with offset*/ + if (realloc_input) { + memcpy(input_buf, input + offset, chunk_len); + dma_input = input_buf; + } else { + dma_input = input + offset; + } + + if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) { + ret = -1; + goto cleanup; + } + + if (realloc_output) { + memcpy(output + offset, output_buf, chunk_len); + } else { + output_buf = output + offset + chunk_len; + } + + len -= chunk_len; + offset += chunk_len; + } + +cleanup: + + if (realloc_input) { + free(input_buf); + } + if (realloc_output) { + free(output_buf); + } + + return ret; +} + +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + +#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) +#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) +#define AES_DMA_ALLOC_CAPS (MALLOC_CAP_DMA | MALLOC_CAP_8BIT) + +static inline void *aes_dma_calloc(size_t num, size_t size, uint32_t caps, size_t *actual_size) +{ + void *ptr = NULL; + esp_dma_calloc(num, size, caps, &ptr, actual_size); + return ptr; +} + +static inline size_t get_cache_line_size(const void *addr) +{ + esp_err_t ret = ESP_FAIL; + size_t cache_line_size = 0; + +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (esp_ptr_external_ram(addr)) { + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &cache_line_size); + } else +#endif + { + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_DMA, &cache_line_size); + } + + if (ret != ESP_OK) { + return 0; + } + + return cache_line_size; +} + +static inline esp_err_t dma_desc_link(crypto_dma_desc_t *dmadesc, size_t crypto_dma_desc_num, size_t cache_line_size) +{ + esp_err_t ret = ESP_OK; + for (int i = 0; i < crypto_dma_desc_num; i++) { + dmadesc[i].dw0.suc_eof = ((i == crypto_dma_desc_num - 1) ? 1 : 0); + dmadesc[i].next = ((i == crypto_dma_desc_num - 1) ? NULL : &dmadesc[i+1]); +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + /* Write back both input buffers and output buffers to clear any cache dirty bit if set */ + ret = esp_cache_msync(dmadesc[i].buffer, dmadesc[i].dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); + if (ret != ESP_OK) { + return ret; + } + } + ret = esp_cache_msync(dmadesc, ALIGN_UP(crypto_dma_desc_num * sizeof(crypto_dma_desc_t), cache_line_size), ESP_CACHE_MSYNC_FLAG_DIR_C2M); +#else + } +#endif + return ret; +} + +static inline void dma_desc_populate(crypto_dma_desc_t *dmadesc, const uint8_t *data, size_t len, int max_desc_size, size_t index) +{ + int dmachunklen = 0; + + while (len) { + dmachunklen = len; + if (dmachunklen > max_desc_size) { + dmachunklen = max_desc_size; + } + dmadesc[index].dw0.size = dmachunklen; + dmadesc[index].dw0.length = dmachunklen; + dmadesc[index].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + dmadesc[index].buffer = (void *)data; + len -= dmachunklen; + data += dmachunklen; + index++; + } +} + +/** + * @brief Function that allocates and populates a given number of DMA descriptors to form a DMA descriptor linked list + * + * @param buffer Data (can be unaligned) buffer that is to be operated upon in an AES operation (ciphertext or plaintext) + * @param len Length of the above data buffer + * @param start_alignment_buffer The buffer which the first DMA descriptor points to for processing start_alignment length of bytes from the above buffer + * @param end_alignment_buffer The buffer which the last DMA descriptor points to for processing end_alignment length of bytes from the above buffer + * @param alignment_buffer_size Size of an alignment buffer + * @param cache_line_size Size of cache line that is needed to align the buffers and DMA descriptors before cache sync + * @param[out] start_alignment The least number of bytes from the start of the buffer that are unaligned to the Cache requirements (L1 Cache alignments) + * @param[out] end_alignment The number of bytes at the end of the buffer aligned up to AES_BLOCK_BYTES that are unaligned to the Cache requirements (L1 Cache alignments) + * @param[out] dma_descs Pointer to the list of DMA descriptors that are needed to be populated + * @param[out] dma_desc_num Number of DMA descriptors that are needed to be allocated + */ +static esp_err_t generate_descriptor_list(const uint8_t *buffer, const size_t len, uint8_t **start_alignment_buffer, uint8_t **end_alignment_buffer, size_t alignment_buffer_size, size_t cache_line_size, size_t *start_alignment, size_t *end_alignment, crypto_dma_desc_t **dma_descs, size_t *dma_desc_num, bool is_output) +{ + size_t unaligned_start_bytes = 0; + size_t aligned_block_bytes = 0; + size_t unaligned_end_bytes = 0; + size_t dma_descs_needed = 0; + + uint8_t *start_alignment_stream_buffer = NULL; + uint8_t *end_alignment_stream_buffer = NULL; + + crypto_dma_desc_t *dma_descriptors = NULL; + + if (len == 0) { + goto ret; + } + + /* Extra bytes that were needed to be processed for supplying the AES peripheral a padded multiple of 16 bytes input */ + size_t extra_bytes = ALIGN_UP(len, AES_BLOCK_BYTES) - len; + + size_t start_offset = ((intptr_t)buffer & (cache_line_size - 1)); + + if (start_offset) { + unaligned_start_bytes = cache_line_size - start_offset; + } else { + unaligned_start_bytes = 0; + } + + if (unaligned_start_bytes < len) { + aligned_block_bytes = ALIGN_DOWN((len - unaligned_start_bytes), cache_line_size); + unaligned_end_bytes = len - unaligned_start_bytes - aligned_block_bytes + extra_bytes; + } else { + unaligned_start_bytes = len + extra_bytes; + unaligned_end_bytes = 0; + aligned_block_bytes = 0; + } + + size_t max_desc_size = (is_output) ? DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED : DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED; + + dma_descs_needed = (unaligned_start_bytes ? 1 : 0) + dma_desc_get_required_num(aligned_block_bytes, max_desc_size) + (unaligned_end_bytes ? 1 : 0); + + /* Allocate memory for DMA descriptors of total size aligned up to a multiple of cache line size */ + dma_descriptors = (crypto_dma_desc_t *) aes_dma_calloc(dma_descs_needed, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA, NULL); + if (dma_descriptors == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for the array of DMA descriptors"); + return ESP_FAIL; + } + + size_t populated_dma_descs = 0; + + if (unaligned_start_bytes) { + start_alignment_stream_buffer = aes_dma_calloc(alignment_buffer_size, sizeof(uint8_t), AES_DMA_ALLOC_CAPS, NULL); + if (start_alignment_stream_buffer == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for start alignment buffer"); + return ESP_FAIL; + } + + memset(start_alignment_stream_buffer, 0, unaligned_start_bytes); + memcpy(start_alignment_stream_buffer, buffer, (unaligned_start_bytes > len) ? len : unaligned_start_bytes); + memset(start_alignment_stream_buffer + unaligned_start_bytes, 0, alignment_buffer_size - unaligned_start_bytes); + + // add start alignment node to the DMA linked list + dma_desc_populate(dma_descriptors, start_alignment_stream_buffer, unaligned_start_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, populated_dma_descs); + populated_dma_descs += (unaligned_start_bytes ? 1 : 0); + } + + if (aligned_block_bytes) { + // add "aligned_dma_desc_num" nodes to DMA linked list + dma_desc_populate(dma_descriptors, buffer + unaligned_start_bytes, aligned_block_bytes, max_desc_size, populated_dma_descs); + populated_dma_descs += dma_desc_get_required_num(aligned_block_bytes, max_desc_size); + } + + if (unaligned_end_bytes) { + end_alignment_stream_buffer = aes_dma_calloc(alignment_buffer_size, sizeof(uint8_t), AES_DMA_ALLOC_CAPS, NULL); + if (end_alignment_stream_buffer == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for end alignment buffer"); + return ESP_FAIL; + } + + memset(end_alignment_stream_buffer, 0, unaligned_end_bytes); + memcpy(end_alignment_stream_buffer, buffer + unaligned_start_bytes + aligned_block_bytes, unaligned_end_bytes - extra_bytes); + memset(end_alignment_stream_buffer + unaligned_end_bytes, 0, alignment_buffer_size - unaligned_end_bytes); + + // add end alignment node to the DMA linked list + dma_desc_populate(dma_descriptors, end_alignment_stream_buffer, unaligned_end_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, populated_dma_descs); + populated_dma_descs += (unaligned_end_bytes ? 1 : 0); + } + + if (dma_desc_link(dma_descriptors, dma_descs_needed, cache_line_size) != ESP_OK) { + ESP_LOGE(TAG, "DMA descriptors cache sync C2M failed"); + return ESP_FAIL; + } + +ret: + if (start_alignment != NULL) { + *start_alignment = unaligned_start_bytes; + } + + if (end_alignment != NULL) { + *end_alignment = unaligned_end_bytes; + } + + if (dma_desc_num != NULL) { + *dma_desc_num = dma_descs_needed; + } + + *dma_descs = dma_descriptors; + *start_alignment_buffer = start_alignment_stream_buffer; + *end_alignment_buffer = end_alignment_stream_buffer; + + return ESP_OK; +} + +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) +{ + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + + bool use_intr = false; + bool input_needs_realloc = false; + bool output_needs_realloc = false; + int ret = 0; + + assert(len > 0); // caller shouldn't ever have len set to zero + assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + /* DMA cannot access memory in the iCache range, copy input to internal ram */ + if (!s_check_dma_capable(input)) { + input_needs_realloc = true; + } + + if (!s_check_dma_capable(output)) { + output_needs_realloc = true; + } + + /* If either input or output is unaccessible to the DMA then they need to be reallocated */ + if (input_needs_realloc || output_needs_realloc) { + return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); + } + + size_t input_cache_line_size = get_cache_line_size(input); + size_t output_cache_line_size = get_cache_line_size(output); + + if (input_cache_line_size == 0 || output_cache_line_size == 0) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Getting cache line size failed"); + return -1; + } + + size_t input_alignment_buffer_size = MAX(2 * input_cache_line_size, AES_BLOCK_BYTES); + + crypto_dma_desc_t *input_desc = NULL; + uint8_t *input_start_stream_buffer = NULL; + uint8_t *input_end_stream_buffer = NULL; + + if (generate_descriptor_list(input, len, &input_start_stream_buffer, &input_end_stream_buffer, input_alignment_buffer_size, input_cache_line_size, NULL, NULL, &input_desc, NULL, false) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Generating input DMA descriptors failed"); + return -1; + } + + size_t output_alignment_buffer_size = MAX(2 * output_cache_line_size, AES_BLOCK_BYTES); + + crypto_dma_desc_t *output_desc = NULL; + uint8_t *output_start_stream_buffer = NULL; + uint8_t *output_end_stream_buffer = NULL; + size_t output_start_alignment = 0; + size_t output_end_alignment = 0; + size_t output_dma_desc_num = 0; + + if (generate_descriptor_list(output, len, &output_start_stream_buffer, &output_end_stream_buffer, output_alignment_buffer_size, output_cache_line_size, &output_start_alignment, &output_end_alignment, &output_desc, &output_dma_desc_num, true) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Generating output DMA descriptors failed"); + return -1; + } + + crypto_dma_desc_t *out_desc_tail = &output_desc[output_dma_desc_num - 1]; + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + if (esp_aes_dma_start(input_desc, output_desc) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + if (esp_cache_msync(output_desc, ALIGN_UP(output_dma_desc_num * sizeof(crypto_dma_desc_t), output_cache_line_size), ESP_CACHE_MSYNC_FLAG_DIR_M2C) != ESP_OK) { + ESP_LOGE(TAG, "Output DMA descriptor cache sync M2C failed"); + ret = -1; + goto cleanup; + } + for (int i = 0; i < output_dma_desc_num; i++) { + if (esp_cache_msync(output_desc[i].buffer, output_desc[i].dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + ESP_LOGE(TAG, "Output DMA descriptor buffers cache sync M2C failed"); + ret = -1; + goto cleanup; + } + } +#endif + + aes_hal_transform_dma_finish(); + + /* Extra bytes that were needed to be processed for supplying the AES peripheral a padded multiple of 16 bytes input */ + size_t extra_bytes = ALIGN_UP(len, AES_BLOCK_BYTES) - len; + + if (output_start_alignment) { + memcpy(output, output_start_stream_buffer, (output_start_alignment > len) ? len : output_start_alignment); + } + + if (output_end_alignment) { + memcpy(output + len - (output_end_alignment - extra_bytes), output_end_stream_buffer, output_end_alignment - extra_bytes); + } + + if (stream_bytes > 0) { + if (output_end_alignment) { + if (output_end_alignment >= AES_BLOCK_BYTES) { + memcpy(stream_out, output_end_stream_buffer + output_end_alignment - AES_BLOCK_BYTES, AES_BLOCK_BYTES); + } else { + size_t to_copy_from_output = AES_BLOCK_BYTES - output_end_alignment; + memcpy(stream_out, output + len - to_copy_from_output, to_copy_from_output); + memcpy(stream_out + to_copy_from_output, output_end_stream_buffer, output_end_alignment); + } + } + else if (output_start_alignment >= len) { + memcpy(stream_out, output_start_stream_buffer + output_start_alignment - AES_BLOCK_BYTES, AES_BLOCK_BYTES); + } + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + + free(input_start_stream_buffer); + free(input_end_stream_buffer); + + free(output_start_stream_buffer); + free(output_end_stream_buffer); + + free(input_desc); + free(output_desc); + + return ret; +} + +#else /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + +/* These are static due to: + * * Must be in DMA capable memory, so stack is not a safe place to put them + * * To avoid having to malloc/free them for every DMA operation + */ +static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; +static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; +static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; +static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; + +/** Append a descriptor to the chain, set head if chain empty + * + * @param[out] head Pointer to the first/head node of the DMA descriptor linked list + * @param item Pointer to the DMA descriptor node that has to be appended + */ +static inline void dma_desc_append(crypto_dma_desc_t **head, crypto_dma_desc_t *item) +{ + crypto_dma_desc_t *it; + if (*head == NULL) { + *head = item; + return; + } + + it = *head; + + while (it->next != 0) { + it = (crypto_dma_desc_t *)it->next; + } + it->dw0.suc_eof = 0; + it->next = item; +} + +/** + * Generate a linked list pointing to a (huge) buffer in an descriptor array. + * + * The caller should ensure there is enough size to hold the array, by calling + * `dma_desc_get_required_num` with the same or less than the max_desc_size argument. + * + * @param[out] dmadesc Output of a descriptor array, the head should be fed to the DMA. + * @param data Buffer for the descriptors to point to. + * @param len Size (or length for TX) of the buffer + * @param max_desc_size Maximum length of each descriptor + * @param isrx The RX DMA may require the buffer to be word-aligned, set to true for a RX link, otherwise false. + */ +static inline void dma_desc_setup_link(crypto_dma_desc_t* dmadesc, const uint8_t *data, int len, int max_desc_size, bool isrx) +{ + int i = 0; + while (len) { + int dmachunklen = len; + if (dmachunklen > max_desc_size) { + dmachunklen = max_desc_size; + } + if (isrx) { + //Receive needs DMA length rounded to next 32-bit boundary + dmadesc[i].dw0.size = (dmachunklen + 3) & (~3); + dmadesc[i].dw0.length = (dmachunklen + 3) & (~3); + } else { + dmadesc[i].dw0.size = dmachunklen; + dmadesc[i].dw0.length = dmachunklen; + } + dmadesc[i].buffer = (void *)data; + dmadesc[i].dw0.suc_eof = 0; + dmadesc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + dmadesc[i].next = &dmadesc[i + 1]; + len -= dmachunklen; + data += dmachunklen; + i++; + } + dmadesc[i - 1].dw0.suc_eof = 1; //Mark last DMA desc as end of stream. + dmadesc[i - 1].next = NULL; +} + +/* Encrypt/decrypt the input using DMA + * The function esp_aes_process_dma zeroises the output buffer in the case of following conditions: + * 1. If key is not written in the hardware + * 2. Memory allocation failures + * 3. If AES interrupt is enabled and ISR initialisation fails + * 4. Failure in any of the AES operations + */ +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) +{ + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + bool use_intr = false; + bool input_needs_realloc = false; + bool output_needs_realloc = false; + int ret = 0; + + assert(len > 0); // caller shouldn't ever have len set to zero + assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + if (block_bytes > 0) { + /* Flush cache if input in external ram */ +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (esp_ptr_external_ram(input)) { + if (esp_cache_msync((void *)input, len, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Cache sync failed for the input in external RAM"); + return -1; + } + } + if (esp_ptr_external_ram(output)) { + size_t dcache_line_size; + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &dcache_line_size); + if (ret != ESP_OK) { + return ret; + } + if ((((intptr_t)(output) & (dcache_line_size - 1)) != 0) || (block_bytes % dcache_line_size != 0)) { + // Non aligned ext-mem buffer + output_needs_realloc = true; + } + } +#endif + /* DMA cannot access memory in the iCache range, copy input to internal ram */ + if (!s_check_dma_capable(input)) { + input_needs_realloc = true; + } + + if (!s_check_dma_capable(output)) { + output_needs_realloc = true; + } + + /* If either input or output is unaccessible to the DMA then they need to be reallocated */ + if (input_needs_realloc || output_needs_realloc) { + return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); + } + + /* Set up dma descriptors for input and output considering the 16 byte alignment requirement for EDMA */ + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); + + /* Allocate both in and out descriptors to save a malloc/free per function call */ + block_desc = heap_caps_aligned_calloc(8, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + if (block_desc == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + + block_in_desc = block_desc; + block_out_desc = block_desc + crypto_dma_desc_num; + + // the size field has 12 bits, but 0 not for 4096. + // to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. + // Maximum size of data in the buffer that a DMA descriptor can hold. + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + //Limit max inlink descriptor length to be 16 byte aligned, require for EDMA + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED, 0); + + /* Setup in/out start descriptors */ + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); + + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; + } + + /* Any leftover bytes which are appended as an additional DMA list */ + if (stream_bytes > 0) { + + memset(&s_stream_in_desc, 0, sizeof(crypto_dma_desc_t)); + memset(&s_stream_out_desc, 0, sizeof(crypto_dma_desc_t)); + + memset(s_stream_in, 0, AES_BLOCK_BYTES); + memset(s_stream_out, 0, AES_BLOCK_BYTES); + + memcpy(s_stream_in, input + block_bytes, stream_bytes); + + dma_desc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + /* Link with block descriptors */ + dma_desc_append(&in_desc_head, &s_stream_in_desc); + dma_desc_append(&out_desc_head, &s_stream_out_desc); + + out_desc_tail = &s_stream_out_desc; + } + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (block_bytes > 0) { + if (esp_ptr_external_ram(output)) { + if(esp_cache_msync((void*)output, block_bytes, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Cache sync failed for the output in external RAM"); + return -1; + } + } + } +#endif + aes_hal_transform_dma_finish(); + + if (stream_bytes > 0) { + memcpy(output + block_bytes, s_stream_out, stream_bytes); + memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES); + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + free(block_desc); + return ret; +} +#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + +#if CONFIG_MBEDTLS_HARDWARE_GCM + +/* Encrypt/decrypt with AES-GCM the input using DMA + * The function esp_aes_process_dma_gcm zeroises the output buffer in the case of following conditions: + * 1. If key is not written in the hardware + * 2. Memory allocation failures + * 3. If AES interrupt is enabled and ISR initialisation fails + * 4. Failure in any of the AES operations + */ +int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len) +{ + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t stream_in_desc, stream_out_desc; + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; + uint32_t len_buf[4] = {}; + uint8_t stream_in[16] = {}; + uint8_t stream_out[16] = {}; + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + + bool use_intr = false; + int ret = 0; + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + /* Set up dma descriptors for input and output */ + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED); + + /* Allocate both in and out descriptors to save a malloc/free per function call, add 1 for length descriptor */ + block_desc = heap_caps_calloc((crypto_dma_desc_num * 2) + 1, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + if (block_desc == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + + block_in_desc = block_desc; + len_desc = block_desc + crypto_dma_desc_num; + block_out_desc = block_desc + crypto_dma_desc_num + 1; + + if (aad_desc != NULL) { + dma_desc_append(&in_desc_head, aad_desc); + } + + if (block_bytes > 0) { + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); + + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; + } + + /* Any leftover bytes which are appended as an additional DMA list */ + if (stream_bytes > 0) { + memcpy(stream_in, input + block_bytes, stream_bytes); + + dma_desc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + dma_desc_append(&in_desc_head, &stream_in_desc); + dma_desc_append(&out_desc_head, &stream_out_desc); + + out_desc_tail = &stream_out_desc; + } + + + len_buf[1] = __builtin_bswap32(aad_len * 8); + len_buf[3] = __builtin_bswap32(len * 8); + + len_desc->dw0.length = sizeof(len_buf); + len_desc->dw0.size = sizeof(len_buf); + len_desc->dw0.owner = 1; + len_desc->dw0.suc_eof = 1; + len_desc->buffer = (uint8_t *)len_buf; + + dma_desc_append(&in_desc_head, len_desc); + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + /* Start AES operation */ + if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_gcm_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_finish(); + + if (stream_bytes > 0) { + memcpy(output + block_bytes, stream_out, stream_bytes); + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + free(block_desc); + return ret; +} + +#endif //CONFIG_MBEDTLS_HARDWARE_GCM diff --git a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c index c43bfd68ae..c989927e2d 100644 --- a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c +++ b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -15,5 +15,9 @@ esp_err_t esp_aes_dma_start(const crypto_dma_desc_t *input, const crypto_dma_des bool esp_aes_dma_done(const crypto_dma_desc_t *output) { +#if SOC_AXI_GDMA_SUPPORTED + return esp_crypto_shared_gdma_done(); +#else return (output->dw0.owner == 0); +#endif } diff --git a/components/mbedtls/port/aes/esp_aes_gcm.c b/components/mbedtls/port/aes/esp_aes_gcm.c index e7c34fc5b4..c33c1fdaa2 100644 --- a/components/mbedtls/port/aes/esp_aes_gcm.c +++ b/components/mbedtls/port/aes/esp_aes_gcm.c @@ -684,10 +684,13 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, return esp_aes_gcm_crypt_and_tag_partial_hw(ctx, mode, length, iv, iv_len, aad, aad_len, input, output, tag_len, tag); } - /* Limit aad len to a single DMA descriptor to simplify DMA handling - In practice, e.g. with mbedtls the length of aad will always be short + /* Limit aad len to a single DMA descriptor to simplify DMA handling + In practice, e.g. with mbedtls the length of aad will always be short + the size field has 12 bits, but 0 not for 4096. + to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. + Maximum size of data in the buffer that a DMA descriptor can hold. */ - if (aad_len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC) { + if (aad_len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED) { return MBEDTLS_ERR_GCM_BAD_INPUT; } /* IV and AD are limited to 2^32 bits, so 2^29 bytes */ diff --git a/components/mbedtls/port/aes/include/esp_aes_internal.h b/components/mbedtls/port/aes/include/esp_aes_internal.h index e400e63f8a..1c065927df 100644 --- a/components/mbedtls/port/aes/include/esp_aes_internal.h +++ b/components/mbedtls/port/aes/include/esp_aes_internal.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -17,6 +17,20 @@ extern "C" { bool valid_key_length(const esp_aes_context *ctx); +#if SOC_AES_SUPPORT_DMA +/** + * @brief Run a AES operation using DMA + * + * @param ctx Aes context + * @param input Pointer to input data + * @param output Pointer to output data + * @param len Length of the input data + * @param stream_out The saved stream-block for resuming next operation. + * @return int -1 on error + */ +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out); +#endif + #if SOC_AES_SUPPORT_GCM /** * @brief Run a AES-GCM conversion using DMA diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index 972c816800..059ebb738a 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -143,33 +143,14 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral) { int rx_ch_id = 0; - esp_err_t ret = ESP_OK; - -#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - // TODO: replace with `esp_cache_msync` - const crypto_dma_desc_t *it = input; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - } - - it = output; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - }; -#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ if (tx_channel == NULL) { /* Allocate a pair of RX and TX for crypto, should only happen the first time we use the GMDA or if user called esp_crypto_shared_gdma_release */ - ret = crypto_shared_gdma_init(); - } - - if (ret != ESP_OK) { - return ret; + esp_err_t ret = crypto_shared_gdma_init(); + if (ret != ESP_OK) { + return ret; + } } /* Tx channel is shared between AES and SHA, need to connect to peripheral every time */ @@ -198,6 +179,20 @@ esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, c return ESP_OK; } +#if SOC_AXI_GDMA_SUPPORTED +bool esp_crypto_shared_gdma_done(void) +{ + int rx_ch_id = 0; + gdma_get_channel_id(rx_channel, &rx_ch_id); + while(1) { + if ((axi_dma_ll_rx_get_interrupt_status(&AXI_DMA, rx_ch_id, true) & 1)) { + break; + } + } + return true; +} +#endif /* SOC_AXI_GDMA_SUPPORTED */ + void esp_crypto_shared_gdma_free() { esp_crypto_sha_aes_lock_acquire(); diff --git a/components/mbedtls/port/include/esp_crypto_shared_gdma.h b/components/mbedtls/port/include/esp_crypto_shared_gdma.h index fd49558912..d6e40fd0fb 100644 --- a/components/mbedtls/port/include/esp_crypto_shared_gdma.h +++ b/components/mbedtls/port/include/esp_crypto_shared_gdma.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2021-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -43,6 +43,15 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou */ esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral); +#if SOC_AXI_GDMA_SUPPORTED +/** + * @brief Busy wait until GDMA RX data transfer is complete + * + * @return true, when GDMA RX data transfer is complete + */ +bool esp_crypto_shared_gdma_done(void); +#endif /* SOC_AXI_GDMA_SUPPORTED */ + /** * @brief Frees any shared crypto DMA channel, if esp_crypto_shared_gdma_start is called after * this, new GDMA channels will be allocated.