diff --git a/components/esp_hw_support/dma/esp_dma_utils.c b/components/esp_hw_support/dma/esp_dma_utils.c index 0c1f0df1af..518cb0c5f9 100644 --- a/components/esp_hw_support/dma/esp_dma_utils.c +++ b/components/esp_hw_support/dma/esp_dma_utils.c @@ -13,15 +13,81 @@ #include "esp_heap_caps.h" #include "esp_memory_utils.h" #include "esp_dma_utils.h" +#include "esp_private/esp_dma_utils.h" #include "esp_private/esp_cache_private.h" #include "soc/soc_caps.h" #include "hal/hal_utils.h" +#include "hal/cache_hal.h" +#include "hal/cache_ll.h" +#include "esp_cache.h" static const char *TAG = "dma_utils"; #define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) #define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1))) +esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array) +{ + esp_err_t ret = ESP_OK; + ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1) + && (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument"); + ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned"); + + // calculate head_overflow_len + size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment; + head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0; + ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len); + // calculate tail_overflow_len + size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment; + ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len); + + uint32_t extra_buf_count = 0; + input_buffer = (uint8_t*)input_buffer; + stash_buffer = (uint8_t*)stash_buffer; + align_array->buf.head.recovery_address = input_buffer; + align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; + align_array->buf.head.length = head_overflow_len; + align_array->buf.body.recovery_address = input_buffer + head_overflow_len; + align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len; + align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len; + align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len; + align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++; + align_array->buf.tail.length = tail_overflow_len; + + // special handling when input_buffer length is no more than buffer alignment + if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len) + { + align_array->buf.head.length = input_buffer_len ; + align_array->buf.body.length = 0 ; + align_array->buf.tail.length = 0 ; + } + + for(int i = 0; i < 3; i++) { + if(!align_array->aligned_buffer[i].length) { + align_array->aligned_buffer[i].aligned_buffer = NULL; + align_array->aligned_buffer[i].recovery_address = NULL; + } + } + + return ret; +} + +esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array) +{ + esp_err_t ret = ESP_OK; + ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument"); + + // only need to copy the head and tail buffer + if(align_array->buf.head.length) { + memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length); + } + if(align_array->buf.tail.length) { + memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length); + } + + return ret; +} + esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size) { ESP_RETURN_ON_FALSE_ISR(dma_mem_info && out_ptr, ESP_ERR_INVALID_ARG, TAG, "null pointer"); diff --git a/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h new file mode 100644 index 0000000000..b9ed67e93e --- /dev/null +++ b/components/esp_hw_support/dma/include/esp_private/esp_dma_utils.h @@ -0,0 +1,85 @@ +/* + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include "esp_err.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief DMA buffer information + */ +typedef struct { + void *aligned_buffer; //!< Buffer address + void *recovery_address; //!< Origin buffer address that aligned buffer should be recovered + size_t length; //!< Buffer length +} dma_buffer_split_info_t; + +/** + * @brief DMA buffer aligned array + */ +typedef struct { + union { + struct { + dma_buffer_split_info_t head; //!< Aligned head part. Corresponds to the part of the original buffer where the head is not aligned + dma_buffer_split_info_t body; //!< Aligned body part. Corresponds to the part of the original aligned buffer + dma_buffer_split_info_t tail; //!< Aligned tail part. Corresponds to the part of the original buffer where the tail is not aligned + } buf; + dma_buffer_split_info_t aligned_buffer[3]; //!< DMA aligned buffer array, consist of `head`, `body` and `tail` + }; +} dma_buffer_split_array_t; + +/** + * @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array + * + * @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part + * + * @param[in] buffer Origin DMA buffer address + * @param[in] buffer_len Origin DMA buffer length + * @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment + * @param[in] stash_buffer_len stash_buffer length + * @param[in] split_alignment Alignment of each buffer required by the DMA + * @param[out] align_array Aligned DMA buffer array + * @return + * - ESP_OK: Split to aligned buffer successfully + * - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument + * + * brief sketch: + * buffer alignment delimiter buffer alignment delimiter + * │ │ + * Origin Buffer │ Origin Buffer │ + * │ │ │ │ + * │ ▼ ▼ ▼ + * │ ...---xxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxx----... + * │ │ │ │ + * │ │ ▼ │ + * │ │ |xxxxxxxxxxxxxxxxxxxxxxxxxxxxx| │ + * │ │ ▲ │ + * ▼ │ │ │ + * Aligned buffers └──► Head Body Tail ◄──────┘ + * │ │ + * ▼ ▼ + * |xxxxx......| |xxxxx......| + */ +esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array); + +/** + * @brief Merge aligned buffer array to origin buffer + * + * @param[in] align_array Aligned DMA buffer array + * @return + * - ESP_OK: Merge aligned buffer to origin buffer successfully + * - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument + */ +esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array); + +#ifdef __cplusplus +} +#endif diff --git a/components/esp_hw_support/test_apps/dma/main/test_gdma.c b/components/esp_hw_support/test_apps/dma/main/test_gdma.c index b5638f6f5e..abaf59a7ed 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_gdma.c +++ b/components/esp_hw_support/test_apps/dma/main/test_gdma.c @@ -14,6 +14,7 @@ #include "esp_heap_caps.h" #include "esp_private/gdma.h" #include "esp_private/gdma_link.h" +#include "esp_private/esp_dma_utils.h" #include "hal/dma_types.h" #include "soc/soc_caps.h" #include "hal/gdma_ll.h" @@ -22,6 +23,9 @@ #include "esp_cache.h" #include "esp_memory_utils.h" +#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) +#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) + TEST_CASE("GDMA channel allocation", "[GDMA]") { gdma_channel_alloc_config_t channel_config = {}; @@ -147,22 +151,9 @@ TEST_CASE("GDMA channel allocation", "[GDMA]") #endif // GDMA_LL_AXI_PAIRS_PER_GROUP >= 2 } -static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +static void test_gdma_config_link_list(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, + gdma_link_list_handle_t *tx_link_list, gdma_link_list_handle_t *rx_link_list, size_t sram_alignment, bool dma_link_in_ext_mem) { - BaseType_t task_woken = pdFALSE; - SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data; - xSemaphoreGiveFromISR(done_sem, &task_woken); - return task_woken == pdTRUE; -} - -static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem) -{ - size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); - gdma_rx_event_callbacks_t rx_cbs = { - .on_recv_eof = test_gdma_m2m_rx_eof_callback, - }; - SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); - TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem)); gdma_strategy_config_t strategy = { .auto_update_desc = true, @@ -189,24 +180,46 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl .check_owner = true, } }; - gdma_link_list_handle_t tx_link_list = NULL; - TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, &tx_link_list)); - // allocate the source buffer from SRAM - uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); - TEST_ASSERT_NOT_NULL(src_data); - + TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, tx_link_list)); // create DMA link list for RX channel gdma_link_list_config_t rx_link_list_config = { .buffer_alignment = sram_alignment, // RX buffer should be aligned to the cache line size, because we will do cache invalidate later .item_alignment = 8, // 8-byte alignment required by the AXI-GDMA - .num_items = 1, + .num_items = 5, .flags = { .items_in_ext_mem = dma_link_in_ext_mem, .check_owner = true, }, }; + TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, rx_link_list)); +} + +static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +{ + BaseType_t task_woken = pdFALSE; + SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data; + xSemaphoreGiveFromISR(done_sem, &task_woken); + return task_woken == pdTRUE; +} + +static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem) +{ + size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + gdma_rx_event_callbacks_t rx_cbs = { + .on_recv_eof = test_gdma_m2m_rx_eof_callback, + }; + SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); + TEST_ASSERT_NOT_NULL(done_sem); + TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem)); + + gdma_link_list_handle_t tx_link_list = NULL; gdma_link_list_handle_t rx_link_list = NULL; - TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, &rx_link_list)); + test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, dma_link_in_ext_mem); + + // allocate the source buffer from SRAM + uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + TEST_ASSERT_NOT_NULL(src_data); + // allocate the destination buffer from SRAM uint8_t *dst_data = heap_caps_calloc(1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); TEST_ASSERT_NOT_NULL(dst_data); @@ -270,7 +283,7 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list))); TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list))); - xSemaphoreTake(done_sem, portMAX_DELAY); + xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS); if (sram_alignment) { // the destination data are not reflected to the cache, so do an invalidate to ask the cache load new data @@ -344,3 +357,146 @@ TEST_CASE("GDMA M2M Mode", "[GDMA][M2M]") TEST_ESP_OK(gdma_del_channel(rx_chan)); #endif // SOC_AXI_GDMA_SUPPORTED } + +typedef struct { + SemaphoreHandle_t done_sem; + dma_buffer_split_array_t *align_array; + size_t split_alignment; + bool need_invalidate; +} test_gdma_context_t; + +static bool test_gdma_m2m_unalgined_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data) +{ + BaseType_t task_woken = pdFALSE; + test_gdma_context_t *user_ctx = (test_gdma_context_t*)user_data; + for (int i = 0; i < 3; i++) { + if(user_ctx->align_array->aligned_buffer[i].aligned_buffer && user_ctx->need_invalidate) { + TEST_ESP_OK(esp_cache_msync(user_ctx->align_array->aligned_buffer[i].aligned_buffer, ALIGN_UP(user_ctx->align_array->aligned_buffer[i].length, user_ctx->split_alignment), ESP_CACHE_MSYNC_FLAG_DIR_M2C)); + } + } + TEST_ESP_OK(esp_dma_merge_aligned_buffers(user_ctx->align_array)); + xSemaphoreGiveFromISR(user_ctx->done_sem, &task_woken); + return task_woken == pdTRUE; +} + +static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len, size_t split_alignment) +{ + TEST_ASSERT_NOT_NULL(src_data); + TEST_ASSERT_NOT_NULL(dst_data); + gdma_channel_handle_t tx_chan = NULL; + gdma_channel_handle_t rx_chan = NULL; + gdma_channel_alloc_config_t tx_chan_alloc_config = {}; + gdma_channel_alloc_config_t rx_chan_alloc_config = {}; + tx_chan_alloc_config = (gdma_channel_alloc_config_t) { + .direction = GDMA_CHANNEL_DIRECTION_TX, + .flags.reserve_sibling = true, + }; + TEST_ESP_OK(gdma_new_ahb_channel(&tx_chan_alloc_config, &tx_chan)); + rx_chan_alloc_config = (gdma_channel_alloc_config_t) { + .direction = GDMA_CHANNEL_DIRECTION_RX, + .sibling_chan = tx_chan, + }; + TEST_ESP_OK(gdma_new_ahb_channel(&rx_chan_alloc_config, &rx_chan)); + size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + + gdma_link_list_handle_t tx_link_list = NULL; + gdma_link_list_handle_t rx_link_list = NULL; + test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, false); + + // prepare the source data + for (int i = 0; i < data_length; i++) { + src_data[i] = i; + } + if (sram_alignment) { + // do write-back for the source data because it's in the cache + TEST_ESP_OK(esp_cache_msync(src_data, ALIGN_UP(data_length, sram_alignment), ESP_CACHE_MSYNC_FLAG_DIR_C2M)); + } + + gdma_buffer_mount_config_t tx_buf_mount_config[] = { + [0] = { + .buffer = src_data, + .length = data_length, + .flags = { + .mark_eof = true, + .mark_final = true, // using singly list, so terminate the link here + } + } + }; + TEST_ESP_OK(gdma_link_mount_buffers(tx_link_list, 0, tx_buf_mount_config, sizeof(tx_buf_mount_config) / sizeof(gdma_buffer_mount_config_t), NULL)); + + // allocate stash_buffer, should be freed by the user + void *stash_buffer = heap_caps_aligned_calloc(split_alignment, 2, split_alignment, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + size_t stash_buffer_len = 2 * split_alignment; + dma_buffer_split_array_t align_array = {0}; + gdma_buffer_mount_config_t rx_aligned_buf_mount_config[3] = {0}; + + TEST_ESP_OK(esp_dma_split_buffer_to_aligned(dst_data + offset_len, data_length, stash_buffer, stash_buffer_len, split_alignment, &align_array)); + for (int i = 0; i < 3; i++) { + rx_aligned_buf_mount_config[i].buffer = align_array.aligned_buffer[i].aligned_buffer; + rx_aligned_buf_mount_config[i].length = align_array.aligned_buffer[i].length; + } + TEST_ESP_OK(gdma_link_mount_buffers(rx_link_list, 0, rx_aligned_buf_mount_config, 3, NULL)); + + gdma_rx_event_callbacks_t rx_cbs = { + .on_recv_eof = test_gdma_m2m_unalgined_rx_eof_callback, + }; + SemaphoreHandle_t done_sem = xSemaphoreCreateBinary(); + TEST_ASSERT_NOT_NULL(done_sem); + test_gdma_context_t user_ctx = { + .done_sem = done_sem, + .align_array = &align_array, + .split_alignment = split_alignment, + .need_invalidate = sram_alignment ? true : false, + }; + TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, &user_ctx)); + + TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list))); + TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list))); + + xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS); + + // validate the destination data + for (int i = 0; i < data_length; i++) { + TEST_ASSERT_EQUAL(i % 256 , dst_data[i + offset_len]); + } + + free(stash_buffer); + TEST_ESP_OK(gdma_del_link_list(tx_link_list)); + TEST_ESP_OK(gdma_del_link_list(rx_link_list)); + TEST_ESP_OK(gdma_del_channel(tx_chan)); + TEST_ESP_OK(gdma_del_channel(rx_chan)); + vSemaphoreDelete(done_sem); +} + +TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]") +{ + uint8_t *sbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + uint8_t *dbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + + size_t split_alignment = 64; + // case buffer len less than buffer alignment + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 4, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 2, split_alignment); + + // case buffer head aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 0, split_alignment); + + // case buffer tail aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 10, split_alignment); + + // case buffer unaligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 100, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 10, 60, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 10, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 10, split_alignment); + + // case buffer full aligned + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 0, split_alignment); + test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 0, split_alignment); + + free(sbuf); + free(dbuf); +}