diff --git a/components/esp_hw_support/dma/async_memcpy_cp_dma.c b/components/esp_hw_support/dma/async_memcpy_cp_dma.c index 975bb92c36..e86bf10a18 100644 --- a/components/esp_hw_support/dma/async_memcpy_cp_dma.c +++ b/components/esp_hw_support/dma/async_memcpy_cp_dma.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -48,7 +48,6 @@ typedef struct async_memcpy_transaction_t { /// @note - Number of transaction objects are determined by the backlog parameter typedef struct { async_memcpy_context_t parent; // Parent IO interface - size_t sram_trans_align; // DMA transfer alignment (both in size and address) for SRAM memory size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor cp_dma_hal_context_t hal; // CPDMA hal intr_handle_t intr; // CPDMA interrupt handle @@ -90,7 +89,7 @@ esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, as uint32_t trans_queue_len = config->backlog ? config->backlog : DEFAULT_TRANSACTION_QUEUE_LENGTH; // allocate memory for transaction pool, aligned to 4 because the trans->eof_node requires that alignment mcp_dma->transaction_pool = heap_caps_aligned_calloc(4, trans_queue_len, sizeof(async_memcpy_transaction_t), - MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); + MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); ESP_GOTO_ON_FALSE(mcp_dma->transaction_pool, ESP_ERR_NO_MEM, err, TAG, "no mem for transaction pool"); // Init hal context @@ -111,8 +110,7 @@ esp_err_t esp_async_memcpy_install_cpdma(const async_memcpy_config_t *config, as // initialize other members portMUX_INITIALIZE(&mcp_dma->spin_lock); atomic_init(&mcp_dma->fsm, MCP_FSM_IDLE); - mcp_dma->sram_trans_align = config->sram_trans_align; - size_t trans_align = config->sram_trans_align; + size_t trans_align = config->dma_burst_size; mcp_dma->max_single_dma_buffer = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE; mcp_dma->parent.del = mcp_cpdma_del; mcp_dma->parent.memcpy = mcp_cpdma_memcpy; @@ -240,12 +238,6 @@ static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void * esp_err_t ret = ESP_OK; async_memcpy_cpdma_context_t *mcp_dma = __containerof(ctx, async_memcpy_cpdma_context_t, parent); ESP_RETURN_ON_FALSE(esp_ptr_internal(src) && esp_ptr_internal(dst), ESP_ERR_INVALID_ARG, TAG, "CP_DMA can only access SRAM"); - // alignment check - if (mcp_dma->sram_trans_align) { - ESP_RETURN_ON_FALSE((((intptr_t)dst & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG, "buffer address not aligned: %p -> %p", src, dst); - ESP_RETURN_ON_FALSE(((n & (mcp_dma->sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, TAG, - "copy size should align to %d bytes", mcp_dma->sram_trans_align); - } async_memcpy_transaction_t *trans = NULL; // pick one transaction node from idle queue trans = try_pop_trans_from_idle_queue(mcp_dma); @@ -257,12 +249,12 @@ static esp_err_t mcp_cpdma_memcpy(async_memcpy_context_t *ctx, void *dst, void * uint32_t num_desc_per_path = (n + max_single_dma_buffer - 1) / max_single_dma_buffer; // allocate DMA descriptors, descriptors need a strict alignment trans->tx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path, sizeof(dma_descriptor_align4_t), - MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); + MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); ESP_GOTO_ON_FALSE(trans->tx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors"); // don't have to allocate the EOF descriptor, we will use trans->eof_node as the RX EOF descriptor if (num_desc_per_path > 1) { trans->rx_desc_link = heap_caps_aligned_calloc(4, num_desc_per_path - 1, sizeof(dma_descriptor_align4_t), - MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); + MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT | MALLOC_CAP_DMA); ESP_GOTO_ON_FALSE(trans->rx_desc_link, ESP_ERR_NO_MEM, err, TAG, "no mem for DMA descriptors"); } else { // small copy buffer, use the trans->eof_node is sufficient diff --git a/components/esp_hw_support/dma/async_memcpy_gdma.c b/components/esp_hw_support/dma/async_memcpy_gdma.c index 8b8fa19930..7282f946fa 100644 --- a/components/esp_hw_support/dma/async_memcpy_gdma.c +++ b/components/esp_hw_support/dma/async_memcpy_gdma.c @@ -69,8 +69,10 @@ typedef struct async_memcpy_transaction_t { typedef struct { async_memcpy_context_t parent; // Parent IO interface size_t descriptor_align; // DMA descriptor alignment - size_t sram_trans_align; // DMA buffer alignment (both in size and address) for SRAM memory - size_t psram_trans_align; // DMA buffer alignment (both in size and address) for PSRAM memory + size_t rx_int_mem_alignment; // DMA buffer alignment (both in size and address) for internal RX memory + size_t rx_ext_mem_alignment; // DMA buffer alignment (both in size and address) for external RX memory + size_t tx_int_mem_alignment; // DMA buffer alignment (both in size and address) for internal TX memory + size_t tx_ext_mem_alignment; // DMA buffer alignment (both in size and address) for external TX memory size_t max_single_dma_buffer; // max DMA buffer size by a single descriptor int gdma_bus_id; // GDMA bus id (AHB, AXI, etc.) gdma_channel_handle_t tx_channel; // GDMA TX channel handle @@ -146,12 +148,12 @@ static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_confi ESP_GOTO_ON_ERROR(gdma_connect(mcp_gdma->rx_channel, m2m_trigger), err, TAG, "GDMA rx connect failed"); ESP_GOTO_ON_ERROR(gdma_connect(mcp_gdma->tx_channel, m2m_trigger), err, TAG, "GDMA tx connect failed"); - gdma_transfer_ability_t transfer_ability = { - .sram_trans_align = config->sram_trans_align, - .psram_trans_align = config->psram_trans_align, + gdma_transfer_config_t transfer_cfg = { + .max_data_burst_size = config->dma_burst_size ? config->dma_burst_size : 16, + .access_ext_mem = true, // allow to do memory copy from/to external memory }; - ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->tx_channel, &transfer_ability), err, TAG, "set tx trans ability failed"); - ESP_GOTO_ON_ERROR(gdma_set_transfer_ability(mcp_gdma->rx_channel, &transfer_ability), err, TAG, "set rx trans ability failed"); + ESP_GOTO_ON_ERROR(gdma_config_transfer(mcp_gdma->tx_channel, &transfer_cfg), err, TAG, "config transfer for tx channel failed"); + ESP_GOTO_ON_ERROR(gdma_config_transfer(mcp_gdma->rx_channel, &transfer_cfg), err, TAG, "config transfer for rx channel failed"); // register rx eof callback gdma_rx_event_callbacks_t cbs = { @@ -172,15 +174,13 @@ static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_confi atomic_init(&mcp_gdma->fsm, MCP_FSM_IDLE); mcp_gdma->gdma_bus_id = gdma_bus_id; - uint32_t psram_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA); - uint32_t sram_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); - // if the psram_trans_align is configured to zero, we should fall back to use the data cache line size - size_t psram_trans_align = MAX(psram_cache_line_size, config->psram_trans_align); - size_t sram_trans_align = MAX(sram_cache_line_size, config->sram_trans_align); - size_t trans_align = MAX(sram_trans_align, psram_trans_align); - mcp_gdma->max_single_dma_buffer = ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align); - mcp_gdma->psram_trans_align = psram_trans_align; - mcp_gdma->sram_trans_align = sram_trans_align; + // get the buffer alignment required by the GDMA channel + gdma_get_alignment_constraints(mcp_gdma->rx_channel, &mcp_gdma->rx_int_mem_alignment, &mcp_gdma->rx_ext_mem_alignment); + gdma_get_alignment_constraints(mcp_gdma->tx_channel, &mcp_gdma->tx_int_mem_alignment, &mcp_gdma->tx_ext_mem_alignment); + + size_t buf_align = MAX(MAX(mcp_gdma->rx_int_mem_alignment, mcp_gdma->rx_ext_mem_alignment), + MAX(mcp_gdma->tx_int_mem_alignment, mcp_gdma->tx_ext_mem_alignment)); + mcp_gdma->max_single_dma_buffer = ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, buf_align); mcp_gdma->parent.del = mcp_gdma_del; mcp_gdma->parent.memcpy = mcp_gdma_memcpy; #if SOC_GDMA_SUPPORT_ETM @@ -335,29 +335,21 @@ static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_gd static bool check_buffer_alignment(async_memcpy_gdma_context_t *mcp_gdma, void *src, void *dst, size_t n) { bool valid = true; - uint32_t psram_align_mask = 0; - uint32_t sram_align_mask = 0; - if (mcp_gdma->psram_trans_align) { - psram_align_mask = mcp_gdma->psram_trans_align - 1; - } - if (mcp_gdma->sram_trans_align) { - sram_align_mask = mcp_gdma->sram_trans_align - 1; - } if (esp_ptr_external_ram(dst)) { - valid = valid && (((uint32_t)dst & psram_align_mask) == 0); - valid = valid && ((n & psram_align_mask) == 0); + valid = valid && (((uint32_t)dst & (mcp_gdma->rx_ext_mem_alignment - 1)) == 0); + valid = valid && ((n & (mcp_gdma->rx_ext_mem_alignment - 1)) == 0); } else { - valid = valid && (((uint32_t)dst & sram_align_mask) == 0); - valid = valid && ((n & sram_align_mask) == 0); + valid = valid && (((uint32_t)dst & (mcp_gdma->rx_int_mem_alignment - 1)) == 0); + valid = valid && ((n & (mcp_gdma->rx_int_mem_alignment - 1)) == 0); } if (esp_ptr_external_ram(src)) { - valid = valid && (((uint32_t)src & psram_align_mask) == 0); - valid = valid && ((n & psram_align_mask) == 0); + valid = valid && (((uint32_t)src & (mcp_gdma->tx_ext_mem_alignment - 1)) == 0); + valid = valid && ((n & (mcp_gdma->tx_ext_mem_alignment - 1)) == 0); } else { - valid = valid && (((uint32_t)src & sram_align_mask) == 0); - valid = valid && ((n & sram_align_mask) == 0); + valid = valid && (((uint32_t)src & (mcp_gdma->tx_int_mem_alignment - 1)) == 0); + valid = valid && ((n & (mcp_gdma->tx_int_mem_alignment - 1)) == 0); } return valid; diff --git a/components/esp_hw_support/include/esp_async_memcpy.h b/components/esp_hw_support/include/esp_async_memcpy.h index 033bb9ed91..1b91b84093 100644 --- a/components/esp_hw_support/include/esp_async_memcpy.h +++ b/components/esp_hw_support/include/esp_async_memcpy.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -51,8 +51,11 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_handle_t mcp_hdl, async_memcp */ typedef struct { uint32_t backlog; /*!< Maximum number of transactions that can be prepared in the background */ - size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */ - size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */ + size_t sram_trans_align __attribute__((deprecated)); /*!< DMA transfer alignment (both in size and address) for SRAM memory */ + union { + size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */ + size_t dma_burst_size; /*!< DMA transfer burst size, in bytes */ + }; uint32_t flags; /*!< Extra flags to control async memcpy feature */ } async_memcpy_config_t; @@ -62,8 +65,7 @@ typedef struct { #define ASYNC_MEMCPY_DEFAULT_CONFIG() \ { \ .backlog = 8, \ - .sram_trans_align = 0, \ - .psram_trans_align = 0, \ + .dma_burst_size = 16, \ .flags = 0, \ } diff --git a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c index 81939889d0..79423a3205 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c +++ b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c @@ -302,8 +302,7 @@ static void memcpy_performance_test(uint32_t buffer_size) async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG(); config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS; - config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer - config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer + config.dma_burst_size = 64; // set a big burst size for performance async_memcpy_handle_t driver = NULL; int64_t elapse_us = 0; float throughput = 0.0; @@ -311,7 +310,7 @@ static void memcpy_performance_test(uint32_t buffer_size) // 1. SRAM->SRAM memcpy_testbench_context_t test_context = { - .align = config.psram_trans_align, + .align = config.dma_burst_size, .buffer_size = buffer_size, .src_in_psram = false, .dst_in_psram = false, diff --git a/docs/en/api-reference/system/async_memcpy.rst b/docs/en/api-reference/system/async_memcpy.rst index d2ff7f2030..000e86d4a2 100644 --- a/docs/en/api-reference/system/async_memcpy.rst +++ b/docs/en/api-reference/system/async_memcpy.rst @@ -36,8 +36,7 @@ There are several ways to install the async memcpy driver, depending on the unde Driver configuration is described in :cpp:type:`async_memcpy_config_t`: * :cpp:member:`backlog`: This is used to configure the maximum number of memory copy transactions that can be queued up before the first one is completed. If this field is set to zero, then the default value 4 will be applied. -* :cpp:member:`sram_trans_align`: Declare SRAM alignment for both data address and copy size, set to zero if the data has no restriction in alignment. If set to a quadruple value (i.e., 4X), the driver will enable the burst mode internally, which is helpful for some performance related application. -* :cpp:member:`psram_trans_align`: Declare PSRAM alignment for both data address and copy size. User has to give it a valid value (only 16, 32, 64 are supported) if the destination of memcpy is located in PSRAM. The default alignment (i.e., 16) will be applied if it is set to zero. Internally, the driver configures the size of block used by DMA to access PSRAM, according to the alignment. +* :cpp:member:`dma_burst_size`: Set the burst size in a DMA burst transfer. * :cpp:member:`flags`: This is used to enable some special driver features. .. code-block:: c diff --git a/docs/zh_CN/api-reference/system/async_memcpy.rst b/docs/zh_CN/api-reference/system/async_memcpy.rst index fdd40d6bef..af3d772b85 100644 --- a/docs/zh_CN/api-reference/system/async_memcpy.rst +++ b/docs/zh_CN/api-reference/system/async_memcpy.rst @@ -36,8 +36,7 @@ DMA 允许多个内存复制请求在首个请求完成之前排队,即允许 在 :cpp:type:`async_memcpy_config_t` 中设置驱动配置: * :cpp:member:`backlog`:此项用于配置首个请求完成前可以排队的最大内存复制事务数量。如果将此字段设置为零,会应用默认值 4。 -* :cpp:member:`sram_trans_align`:声明 SRAM 中数据地址和复制大小的对齐方式,如果数据没有对齐限制,则设置为零。如果设置为四的倍数值(即 4X),驱动程序将内部启用突发模式,这有利于某些和性能相关的应用程序。 -* :cpp:member:`psram_trans_align`:声明 PSRAM 中数据地址和复制大小的对齐方式。如果 memcpy 的目标地址位于 PSRAM 中,用户必须给出一个有效值(只支持 16、32、64)。如果设置为零,会默认采用 16 位对齐。在内部,驱动程序会根据对齐方式来配置 DMA 访问 PSRAM 时所用的块大小。 +* :cpp:member:`dma_burst_size`:设置单次 DMA 传输中突发数据量的大小。 * :cpp:member:`flags`:此项可以启用一些特殊的驱动功能。 .. code-block:: c