diff --git a/components/esp_hw_support/dma/esp_dma_utils.c b/components/esp_hw_support/dma/esp_dma_utils.c index 518cb0c5f9..d93ee97ac7 100644 --- a/components/esp_hw_support/dma/esp_dma_utils.c +++ b/components/esp_hw_support/dma/esp_dma_utils.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -24,68 +24,106 @@ static const char *TAG = "dma_utils"; #define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) -#define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1))) -esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array) +esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer) { - esp_err_t ret = ESP_OK; - ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1) - && (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument"); - ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned"); + ESP_RETURN_ON_FALSE(rx_buffer && buffer_len && align_buf_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument"); - // calculate head_overflow_len - size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment; - head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0; - ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len); - // calculate tail_overflow_len - size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment; - ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len); + // read the cache line size of internal and external memory, we also use this information to check if a given memory is behind the cache + size_t int_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + size_t ext_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA); - uint32_t extra_buf_count = 0; - input_buffer = (uint8_t*)input_buffer; - stash_buffer = (uint8_t*)stash_buffer; - align_array->buf.head.recovery_address = input_buffer; - align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; - align_array->buf.head.length = head_overflow_len; - align_array->buf.body.recovery_address = input_buffer + head_overflow_len; - align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len; - align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len; - align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len; - align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; - align_array->buf.tail.length = tail_overflow_len; + size_t split_line_size = 0; + if (esp_ptr_external_ram(rx_buffer)) { + split_line_size = ext_mem_cache_line_size; + } else if (esp_ptr_internal(rx_buffer)) { + split_line_size = int_mem_cache_line_size; + } + ESP_LOGV(TAG, "split_line_size:%zu", split_line_size); - // special handling when input_buffer length is no more than buffer alignment - if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len) - { - align_array->buf.head.length = input_buffer_len ; - align_array->buf.body.length = 0 ; - align_array->buf.tail.length = 0 ; + // allocate the stash buffer from internal RAM + // Note, the split_line_size can be 0, in this case, the stash_buffer is also NULL, which is fine + uint8_t* stash_buffer = heap_caps_calloc(2, split_line_size, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + ESP_RETURN_ON_FALSE(!(split_line_size && !stash_buffer), ESP_ERR_NO_MEM, TAG, "no mem for stash buffer"); + + // clear align_array to avoid garbage data + memset(align_buf_array, 0, sizeof(dma_buffer_split_array_t)); + bool need_cache_sync[3] = {false}; + + // if split_line_size is non-zero, split the buffer into head, body and tail + if (split_line_size > 0) { + // calculate head_overflow_len + size_t head_overflow_len = (uintptr_t)rx_buffer % split_line_size; + head_overflow_len = head_overflow_len ? split_line_size - head_overflow_len : 0; + ESP_LOGV(TAG, "head_addr:%p head_overflow_len:%zu", rx_buffer, head_overflow_len); + // calculate tail_overflow_len + size_t tail_overflow_len = ((uintptr_t)rx_buffer + buffer_len) % split_line_size; + ESP_LOGV(TAG, "tail_addr:%p tail_overflow_len:%zu", rx_buffer + buffer_len - tail_overflow_len, tail_overflow_len); + + uint8_t extra_buf_count = 0; + uint8_t* input_buffer = (uint8_t*)rx_buffer; + align_buf_array->buf.head.recovery_address = input_buffer; + align_buf_array->buf.head.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++; + align_buf_array->buf.head.length = head_overflow_len; + need_cache_sync[0] = int_mem_cache_line_size > 0; + align_buf_array->buf.body.recovery_address = input_buffer + head_overflow_len; + align_buf_array->buf.body.aligned_buffer = input_buffer + head_overflow_len; + align_buf_array->buf.body.length = buffer_len - head_overflow_len - tail_overflow_len; + need_cache_sync[1] = true; + align_buf_array->buf.tail.recovery_address = input_buffer + buffer_len - tail_overflow_len; + align_buf_array->buf.tail.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++; + align_buf_array->buf.tail.length = tail_overflow_len; + need_cache_sync[2] = int_mem_cache_line_size > 0; + + // special handling when input_buffer length is no more than buffer alignment + if (head_overflow_len >= buffer_len || tail_overflow_len >= buffer_len) { + align_buf_array->buf.head.length = buffer_len ; + align_buf_array->buf.body.length = 0 ; + align_buf_array->buf.tail.length = 0 ; + } + } else { + align_buf_array->buf.body.aligned_buffer = rx_buffer; + align_buf_array->buf.body.recovery_address = rx_buffer; + align_buf_array->buf.body.length = buffer_len; + need_cache_sync[1] = false; } - for(int i = 0; i < 3; i++) { - if(!align_array->aligned_buffer[i].length) { - align_array->aligned_buffer[i].aligned_buffer = NULL; - align_array->aligned_buffer[i].recovery_address = NULL; + for (int i = 0; i < 3; i++) { + if (align_buf_array->aligned_buffer[i].length == 0) { + align_buf_array->aligned_buffer[i].aligned_buffer = NULL; + align_buf_array->aligned_buffer[i].recovery_address = NULL; + need_cache_sync[i] = false; } } - return ret; + // invalidate the aligned buffer if necessary + for (int i = 0; i < 3; i++) { + if (need_cache_sync[i]) { + size_t sync_size = align_buf_array->aligned_buffer[i].length; + if (sync_size < split_line_size) { + // If the size is smaller than the cache line, we need to sync the split buffer (must be cache line sized) + sync_size = split_line_size; + } + esp_cache_msync(align_buf_array->aligned_buffer[i].aligned_buffer, sync_size, ESP_CACHE_MSYNC_FLAG_DIR_M2C); + } + } + + *ret_stash_buffer = stash_buffer; + return ESP_OK; } -esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array) +esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_array) { - esp_err_t ret = ESP_OK; - ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument"); + ESP_RETURN_ON_FALSE_ISR(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument"); // only need to copy the head and tail buffer - if(align_array->buf.head.length) { + if (align_array->buf.head.length) { memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length); } - if(align_array->buf.tail.length) { + if (align_array->buf.tail.length) { memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length); } - - return ret; + return ESP_OK; } esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size) diff --git a/components/esp_hw_support/dma/gdma_link.c b/components/esp_hw_support/dma/gdma_link.c index 5381b150e1..6b141e9cda 100644 --- a/components/esp_hw_support/dma/gdma_link.c +++ b/components/esp_hw_support/dma/gdma_link.c @@ -6,14 +6,8 @@ #include #include -#include #include -#include -#include "sdkconfig.h" -#include "freertos/FreeRTOS.h" -#include "freertos/task.h" #include "soc/soc_caps.h" -#include "soc/ext_mem_defs.h" #include "esp_log.h" #include "esp_check.h" #include "esp_memory_utils.h" diff --git a/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h index b9ed67e93e..ac89c287f4 100644 --- a/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h +++ b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -24,6 +24,8 @@ typedef struct { /** * @brief DMA buffer aligned array + * The array contains three parts: head, body and tail. + * Length of each part will be >=0, especially, length=0 means that there is no such part. */ typedef struct { union { @@ -37,22 +39,21 @@ typedef struct { } dma_buffer_split_array_t; /** - * @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array + * @brief Split DMA RX buffer to cache aligned buffers * - * @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part + * @note After the original RX buffer is split into an array, caller should mount the buffer array to the DMA controller in scatter-gather mode. + * Don't read/write the aligned buffers before the DMA finished using them. * - * @param[in] buffer Origin DMA buffer address - * @param[in] buffer_len Origin DMA buffer length - * @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment - * @param[in] stash_buffer_len stash_buffer length - * @param[in] split_alignment Alignment of each buffer required by the DMA - * @param[out] align_array Aligned DMA buffer array + * @param[in] rx_buffer The origin DMA buffer used for receiving data + * @param[in] buffer_len rx_buffer length + * @param[out] align_buf_array Aligned DMA buffer array + * @param[out] ret_stash_buffer Allocated stash buffer (caller should free it after use) * @return * - ESP_OK: Split to aligned buffer successfully * - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument * * brief sketch: - * buffer alignment delimiter buffer alignment delimiter + * cache alignment delimiter cache alignment delimiter * │ │ * Origin Buffer │ Origin Buffer │ * │ │ │ │ @@ -68,17 +69,19 @@ typedef struct { * ▼ ▼ * |xxxxx......| |xxxxx......| */ -esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array); +esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer); /** - * @brief Merge aligned buffer array to origin buffer + * @brief Merge aligned RX buffer array to origin buffer * - * @param[in] align_array Aligned DMA buffer array + * @note This function can be used in the ISR context. + * + * @param[in] align_buf_array Aligned DMA buffer array * @return * - ESP_OK: Merge aligned buffer to origin buffer successfully * - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument */ -esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array); +esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_buf_array); #ifdef __cplusplus } diff --git a/components/esp_hw_support/test_apps/dma/main/test_gdma.c b/components/esp_hw_support/test_apps/dma/main/test_gdma.c index abaf59a7ed..780c6700ed 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_gdma.c +++ b/components/esp_hw_support/test_apps/dma/main/test_gdma.c @@ -361,25 +361,18 @@ TEST_CASE("GDMA M2M Mode", "[GDMA][M2M]") typedef struct { SemaphoreHandle_t done_sem; dma_buffer_split_array_t *align_array; - size_t split_alignment; - bool need_invalidate; } test_gdma_context_t; -static bool test_gdma_m2m_unalgined_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +static bool test_gdma_m2m_unaligned_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) { BaseType_t task_woken = pdFALSE; test_gdma_context_t *user_ctx = (test_gdma_context_t*)user_data; - for (int i = 0; i < 3; i++) { - if(user_ctx->align_array->aligned_buffer[i].aligned_buffer && user_ctx->need_invalidate) { - TEST_ESP_OK(esp_cache_msync(user_ctx->align_array->aligned_buffer[i].aligned_buffer, ALIGN_UP(user_ctx->align_array->aligned_buffer[i].length, user_ctx->split_alignment), ESP_CACHE_MSYNC_FLAG_DIR_M2C)); - } - } - TEST_ESP_OK(esp_dma_merge_aligned_buffers(user_ctx->align_array)); + TEST_ESP_OK(esp_dma_merge_aligned_rx_buffers(user_ctx->align_array)); xSemaphoreGiveFromISR(user_ctx->done_sem, &task_woken); return task_woken == pdTRUE; } -static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len, size_t split_alignment) +static void test_gdma_m2m_unaligned_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len) { TEST_ASSERT_NOT_NULL(src_data); TEST_ASSERT_NOT_NULL(dst_data); @@ -424,13 +417,10 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_ }; TEST_ESP_OK(gdma_link_mount_buffers(tx_link_list, 0, tx_buf_mount_config, sizeof(tx_buf_mount_config) / sizeof(gdma_buffer_mount_config_t), NULL)); - // allocate stash_buffer, should be freed by the user - void *stash_buffer = heap_caps_aligned_calloc(split_alignment, 2, split_alignment, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); - size_t stash_buffer_len = 2 * split_alignment; dma_buffer_split_array_t align_array = {0}; gdma_buffer_mount_config_t rx_aligned_buf_mount_config[3] = {0}; - - TEST_ESP_OK(esp_dma_split_buffer_to_aligned(dst_data + offset_len, data_length, stash_buffer, stash_buffer_len, split_alignment, &align_array)); + uint8_t* stash_buffer = NULL; + TEST_ESP_OK(esp_dma_split_rx_buffer_to_cache_aligned(dst_data + offset_len, data_length, &align_array, &stash_buffer)); for (int i = 0; i < 3; i++) { rx_aligned_buf_mount_config[i].buffer = align_array.aligned_buffer[i].aligned_buffer; rx_aligned_buf_mount_config[i].length = align_array.aligned_buffer[i].length; @@ -438,15 +428,13 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_ TEST_ESP_OK(gdma_link_mount_buffers(rx_link_list, 0, rx_aligned_buf_mount_config, 3, NULL)); gdma_rx_event_callbacks_t rx_cbs = { - .on_recv_eof = test_gdma_m2m_unalgined_rx_eof_callback, + .on_recv_eof = test_gdma_m2m_unaligned_rx_eof_callback, }; SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); TEST_ASSERT_NOT_NULL(done_sem); test_gdma_context_t user_ctx = { .done_sem = done_sem, .align_array = &align_array, - .split_alignment = split_alignment, - .need_invalidate = sram_alignment ? true : false, }; TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, &user_ctx)); @@ -460,12 +448,12 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_ TEST_ASSERT_EQUAL(i % 256 , dst_data[i + offset_len]); } - free(stash_buffer); TEST_ESP_OK(gdma_del_link_list(tx_link_list)); TEST_ESP_OK(gdma_del_link_list(rx_link_list)); TEST_ESP_OK(gdma_del_channel(tx_chan)); TEST_ESP_OK(gdma_del_channel(rx_chan)); vSemaphoreDelete(done_sem); + free(stash_buffer); } TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]") @@ -473,29 +461,28 @@ TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]") uint8_t *sbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); uint8_t *dbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); - size_t split_alignment = 64; // case buffer len less than buffer alignment - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 0, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 4, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 2, split_alignment); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 0); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 4); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 2); // case buffer head aligned - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 0, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 0, split_alignment); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 246, 0); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8182, 0); // case buffer tail aligned - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 10, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 10, split_alignment); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 246, 10); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8182, 10); // case buffer unaligned - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 100, 10, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 10, 60, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 10, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 10, split_alignment); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 100, 10); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 10, 60); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 256, 10); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8192, 10); // case buffer full aligned - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 0, split_alignment); - test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 0, split_alignment); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 256, 0); + test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8192, 0); free(sbuf); free(dbuf);