Merge branch 'feat/dma_cache_align' into 'master'

feat(dma): Add helper functions to align buffer

Closes IDF-10462

See merge request espressif/esp-idf!32343
This commit is contained in:
Chen Ji Chang
2024-08-14 10:00:55 +08:00
3 changed files with 331 additions and 24 deletions

View File

@@ -13,15 +13,81 @@
#include "esp_heap_caps.h"
#include "esp_memory_utils.h"
#include "esp_dma_utils.h"
#include "esp_private/esp_dma_utils.h"
#include "esp_private/esp_cache_private.h"
#include "soc/soc_caps.h"
#include "hal/hal_utils.h"
#include "hal/cache_hal.h"
#include "hal/cache_ll.h"
#include "esp_cache.h"
static const char *TAG = "dma_utils";
#define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1)))
esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1)
&& (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument");
ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned");
// calculate head_overflow_len
size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment;
head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0;
ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len);
// calculate tail_overflow_len
size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment;
ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len);
uint32_t extra_buf_count = 0;
input_buffer = (uint8_t*)input_buffer;
stash_buffer = (uint8_t*)stash_buffer;
align_array->buf.head.recovery_address = input_buffer;
align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.head.length = head_overflow_len;
align_array->buf.body.recovery_address = input_buffer + head_overflow_len;
align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len;
align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len;
align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len;
align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.tail.length = tail_overflow_len;
// special handling when input_buffer length is no more than buffer alignment
if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len)
{
align_array->buf.head.length = input_buffer_len ;
align_array->buf.body.length = 0 ;
align_array->buf.tail.length = 0 ;
}
for(int i = 0; i < 3; i++) {
if(!align_array->aligned_buffer[i].length) {
align_array->aligned_buffer[i].aligned_buffer = NULL;
align_array->aligned_buffer[i].recovery_address = NULL;
}
}
return ret;
}
esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
// only need to copy the head and tail buffer
if(align_array->buf.head.length) {
memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length);
}
if(align_array->buf.tail.length) {
memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length);
}
return ret;
}
esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size)
{
ESP_RETURN_ON_FALSE_ISR(dma_mem_info && out_ptr, ESP_ERR_INVALID_ARG, TAG, "null pointer");

View File

@@ -0,0 +1,85 @@
/*
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <stdbool.h>
#include "esp_err.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief DMA buffer information
*/
typedef struct {
void *aligned_buffer; //!< Buffer address
void *recovery_address; //!< Origin buffer address that aligned buffer should be recovered
size_t length; //!< Buffer length
} dma_buffer_split_info_t;
/**
* @brief DMA buffer aligned array
*/
typedef struct {
union {
struct {
dma_buffer_split_info_t head; //!< Aligned head part. Corresponds to the part of the original buffer where the head is not aligned
dma_buffer_split_info_t body; //!< Aligned body part. Corresponds to the part of the original aligned buffer
dma_buffer_split_info_t tail; //!< Aligned tail part. Corresponds to the part of the original buffer where the tail is not aligned
} buf;
dma_buffer_split_info_t aligned_buffer[3]; //!< DMA aligned buffer array, consist of `head`, `body` and `tail`
};
} dma_buffer_split_array_t;
/**
* @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array
*
* @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part
*
* @param[in] buffer Origin DMA buffer address
* @param[in] buffer_len Origin DMA buffer length
* @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment
* @param[in] stash_buffer_len stash_buffer length
* @param[in] split_alignment Alignment of each buffer required by the DMA
* @param[out] align_array Aligned DMA buffer array
* @return
* - ESP_OK: Split to aligned buffer successfully
* - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument
*
* brief sketch:
* buffer alignment delimiter buffer alignment delimiter
* │ │
* Origin Buffer │ Origin Buffer │
* │ │ │ │
* │ ▼ ▼ ▼
* │ ...---xxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxx----...
* │ │ │ │
* │ │ ▼ │
* │ │ |xxxxxxxxxxxxxxxxxxxxxxxxxxxxx| │
* │ │ ▲ │
* ▼ │ │ │
* Aligned buffers └──► Head Body Tail ◄──────┘
* │ │
* ▼ ▼
* |xxxxx......| |xxxxx......|
*/
esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array);
/**
* @brief Merge aligned buffer array to origin buffer
*
* @param[in] align_array Aligned DMA buffer array
* @return
* - ESP_OK: Merge aligned buffer to origin buffer successfully
* - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument
*/
esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array);
#ifdef __cplusplus
}
#endif

View File

@@ -14,6 +14,7 @@
#include "esp_heap_caps.h"
#include "esp_private/gdma.h"
#include "esp_private/gdma_link.h"
#include "esp_private/esp_dma_utils.h"
#include "hal/dma_types.h"
#include "soc/soc_caps.h"
#include "hal/gdma_ll.h"
@@ -22,6 +23,9 @@
#include "esp_cache.h"
#include "esp_memory_utils.h"
#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1))
TEST_CASE("GDMA channel allocation", "[GDMA]")
{
gdma_channel_alloc_config_t channel_config = {};
@@ -147,22 +151,9 @@ TEST_CASE("GDMA channel allocation", "[GDMA]")
#endif // GDMA_LL_AXI_PAIRS_PER_GROUP >= 2
}
static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
static void test_gdma_config_link_list(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan,
gdma_link_list_handle_t *tx_link_list, gdma_link_list_handle_t *rx_link_list, size_t sram_alignment, bool dma_link_in_ext_mem)
{
BaseType_t task_woken = pdFALSE;
SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data;
xSemaphoreGiveFromISR(done_sem, &task_woken);
return task_woken == pdTRUE;
}
static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem)
{
size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
gdma_rx_event_callbacks_t rx_cbs = {
.on_recv_eof = test_gdma_m2m_rx_eof_callback,
};
SemaphoreHandle_t done_sem = xSemaphoreCreateBinary();
TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem));
gdma_strategy_config_t strategy = {
.auto_update_desc = true,
@@ -189,24 +180,46 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl
.check_owner = true,
}
};
gdma_link_list_handle_t tx_link_list = NULL;
TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, &tx_link_list));
// allocate the source buffer from SRAM
uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
TEST_ASSERT_NOT_NULL(src_data);
TEST_ESP_OK(gdma_new_link_list(&tx_link_list_config, tx_link_list));
// create DMA link list for RX channel
gdma_link_list_config_t rx_link_list_config = {
.buffer_alignment = sram_alignment, // RX buffer should be aligned to the cache line size, because we will do cache invalidate later
.item_alignment = 8, // 8-byte alignment required by the AXI-GDMA
.num_items = 1,
.num_items = 5,
.flags = {
.items_in_ext_mem = dma_link_in_ext_mem,
.check_owner = true,
},
};
TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, rx_link_list));
}
static bool test_gdma_m2m_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
{
BaseType_t task_woken = pdFALSE;
SemaphoreHandle_t done_sem = (SemaphoreHandle_t)user_data;
xSemaphoreGiveFromISR(done_sem, &task_woken);
return task_woken == pdTRUE;
}
static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handle_t rx_chan, bool dma_link_in_ext_mem)
{
size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
gdma_rx_event_callbacks_t rx_cbs = {
.on_recv_eof = test_gdma_m2m_rx_eof_callback,
};
SemaphoreHandle_t done_sem = xSemaphoreCreateBinary();
TEST_ASSERT_NOT_NULL(done_sem);
TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, done_sem));
gdma_link_list_handle_t tx_link_list = NULL;
gdma_link_list_handle_t rx_link_list = NULL;
TEST_ESP_OK(gdma_new_link_list(&rx_link_list_config, &rx_link_list));
test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, dma_link_in_ext_mem);
// allocate the source buffer from SRAM
uint8_t *src_data = heap_caps_calloc(1, 128, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
TEST_ASSERT_NOT_NULL(src_data);
// allocate the destination buffer from SRAM
uint8_t *dst_data = heap_caps_calloc(1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
TEST_ASSERT_NOT_NULL(dst_data);
@@ -270,7 +283,7 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl
TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list)));
TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list)));
xSemaphoreTake(done_sem, portMAX_DELAY);
xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS);
if (sram_alignment) {
// the destination data are not reflected to the cache, so do an invalidate to ask the cache load new data
@@ -344,3 +357,146 @@ TEST_CASE("GDMA M2M Mode", "[GDMA][M2M]")
TEST_ESP_OK(gdma_del_channel(rx_chan));
#endif // SOC_AXI_GDMA_SUPPORTED
}
typedef struct {
SemaphoreHandle_t done_sem;
dma_buffer_split_array_t *align_array;
size_t split_alignment;
bool need_invalidate;
} test_gdma_context_t;
static bool test_gdma_m2m_unalgined_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
{
BaseType_t task_woken = pdFALSE;
test_gdma_context_t *user_ctx = (test_gdma_context_t*)user_data;
for (int i = 0; i < 3; i++) {
if(user_ctx->align_array->aligned_buffer[i].aligned_buffer && user_ctx->need_invalidate) {
TEST_ESP_OK(esp_cache_msync(user_ctx->align_array->aligned_buffer[i].aligned_buffer, ALIGN_UP(user_ctx->align_array->aligned_buffer[i].length, user_ctx->split_alignment), ESP_CACHE_MSYNC_FLAG_DIR_M2C));
}
}
TEST_ESP_OK(esp_dma_merge_aligned_buffers(user_ctx->align_array));
xSemaphoreGiveFromISR(user_ctx->done_sem, &task_woken);
return task_woken == pdTRUE;
}
static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len, size_t split_alignment)
{
TEST_ASSERT_NOT_NULL(src_data);
TEST_ASSERT_NOT_NULL(dst_data);
gdma_channel_handle_t tx_chan = NULL;
gdma_channel_handle_t rx_chan = NULL;
gdma_channel_alloc_config_t tx_chan_alloc_config = {};
gdma_channel_alloc_config_t rx_chan_alloc_config = {};
tx_chan_alloc_config = (gdma_channel_alloc_config_t) {
.direction = GDMA_CHANNEL_DIRECTION_TX,
.flags.reserve_sibling = true,
};
TEST_ESP_OK(gdma_new_ahb_channel(&tx_chan_alloc_config, &tx_chan));
rx_chan_alloc_config = (gdma_channel_alloc_config_t) {
.direction = GDMA_CHANNEL_DIRECTION_RX,
.sibling_chan = tx_chan,
};
TEST_ESP_OK(gdma_new_ahb_channel(&rx_chan_alloc_config, &rx_chan));
size_t sram_alignment = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
gdma_link_list_handle_t tx_link_list = NULL;
gdma_link_list_handle_t rx_link_list = NULL;
test_gdma_config_link_list(tx_chan, rx_chan, &tx_link_list, &rx_link_list, sram_alignment, false);
// prepare the source data
for (int i = 0; i < data_length; i++) {
src_data[i] = i;
}
if (sram_alignment) {
// do write-back for the source data because it's in the cache
TEST_ESP_OK(esp_cache_msync(src_data, ALIGN_UP(data_length, sram_alignment), ESP_CACHE_MSYNC_FLAG_DIR_C2M));
}
gdma_buffer_mount_config_t tx_buf_mount_config[] = {
[0] = {
.buffer = src_data,
.length = data_length,
.flags = {
.mark_eof = true,
.mark_final = true, // using singly list, so terminate the link here
}
}
};
TEST_ESP_OK(gdma_link_mount_buffers(tx_link_list, 0, tx_buf_mount_config, sizeof(tx_buf_mount_config) / sizeof(gdma_buffer_mount_config_t), NULL));
// allocate stash_buffer, should be freed by the user
void *stash_buffer = heap_caps_aligned_calloc(split_alignment, 2, split_alignment, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
size_t stash_buffer_len = 2 * split_alignment;
dma_buffer_split_array_t align_array = {0};
gdma_buffer_mount_config_t rx_aligned_buf_mount_config[3] = {0};
TEST_ESP_OK(esp_dma_split_buffer_to_aligned(dst_data + offset_len, data_length, stash_buffer, stash_buffer_len, split_alignment, &align_array));
for (int i = 0; i < 3; i++) {
rx_aligned_buf_mount_config[i].buffer = align_array.aligned_buffer[i].aligned_buffer;
rx_aligned_buf_mount_config[i].length = align_array.aligned_buffer[i].length;
}
TEST_ESP_OK(gdma_link_mount_buffers(rx_link_list, 0, rx_aligned_buf_mount_config, 3, NULL));
gdma_rx_event_callbacks_t rx_cbs = {
.on_recv_eof = test_gdma_m2m_unalgined_rx_eof_callback,
};
SemaphoreHandle_t done_sem = xSemaphoreCreateBinary();
TEST_ASSERT_NOT_NULL(done_sem);
test_gdma_context_t user_ctx = {
.done_sem = done_sem,
.align_array = &align_array,
.split_alignment = split_alignment,
.need_invalidate = sram_alignment ? true : false,
};
TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, &user_ctx));
TEST_ESP_OK(gdma_start(rx_chan, gdma_link_get_head_addr(rx_link_list)));
TEST_ESP_OK(gdma_start(tx_chan, gdma_link_get_head_addr(tx_link_list)));
xSemaphoreTake(done_sem, 1000 / portTICK_PERIOD_MS);
// validate the destination data
for (int i = 0; i < data_length; i++) {
TEST_ASSERT_EQUAL(i % 256 , dst_data[i + offset_len]);
}
free(stash_buffer);
TEST_ESP_OK(gdma_del_link_list(tx_link_list));
TEST_ESP_OK(gdma_del_link_list(rx_link_list));
TEST_ESP_OK(gdma_del_channel(tx_chan));
TEST_ESP_OK(gdma_del_channel(rx_chan));
vSemaphoreDelete(done_sem);
}
TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]")
{
uint8_t *sbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
uint8_t *dbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
size_t split_alignment = 64;
// case buffer len less than buffer alignment
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 4, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 2, split_alignment);
// case buffer head aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 0, split_alignment);
// case buffer tail aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 10, split_alignment);
// case buffer unaligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 100, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 10, 60, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 10, split_alignment);
// case buffer full aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 0, split_alignment);
free(sbuf);
free(dbuf);
}