From 3b7da7eae52fbb85569f5261926ce73476e2ce4b Mon Sep 17 00:00:00 2001 From: morris Date: Tue, 11 Jul 2023 16:32:54 +0800 Subject: [PATCH 1/2] feat(gdma): gdma descriptor alignment --- .../hal/esp32p4/include/hal/axi_dma_ll.h | 2 -- components/hal/include/hal/dma_types.h | 34 ++++++++++++++----- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/components/hal/esp32p4/include/hal/axi_dma_ll.h b/components/hal/esp32p4/include/hal/axi_dma_ll.h index 2219725c0d..f044cc6707 100644 --- a/components/hal/esp32p4/include/hal/axi_dma_ll.h +++ b/components/hal/esp32p4/include/hal/axi_dma_ll.h @@ -108,7 +108,6 @@ static inline void axi_dma_ll_rx_enable_owner_check(axi_dma_dev_t *dev, uint32_t */ static inline void axi_dma_ll_rx_enable_data_burst(axi_dma_dev_t *dev, uint32_t channel, bool enable) { - // TODO: IDF-6504 } /** @@ -305,7 +304,6 @@ static inline void axi_dma_ll_tx_enable_owner_check(axi_dma_dev_t *dev, uint32_t */ static inline void axi_dma_ll_tx_enable_data_burst(axi_dma_dev_t *dev, uint32_t channel, bool enable) { - // TODO: IDF-6504 } /** diff --git a/components/hal/include/hal/dma_types.h b/components/hal/include/hal/dma_types.h index dead785c2c..a0cae11ea3 100644 --- a/components/hal/include/hal/dma_types.h +++ b/components/hal/include/hal/dma_types.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -14,10 +14,10 @@ extern "C" { #endif /** - * @brief Type of DMA descriptor - * + * @brief Type of DMA descriptor the aligned to 4 bytes */ -typedef struct dma_descriptor_s { +typedef struct dma_descriptor_s dma_descriptor_t; +struct dma_descriptor_s { struct { uint32_t size : 12; /*!< Buffer size */ uint32_t length : 12; /*!< Number of valid bytes in the buffer */ @@ -28,17 +28,35 @@ typedef struct dma_descriptor_s { uint32_t owner : 1; /*!< Who is allowed to access the buffer that this descriptor points to */ } dw0; /*!< Descriptor Word 0 */ void *buffer; /*!< Pointer to the buffer */ - struct dma_descriptor_s *next; /*!< Pointer to the next descriptor (set to NULL if the descriptor is the last one, e.g. suc_eof=1) */ -} dma_descriptor_t; + dma_descriptor_t *next; /*!< Pointer to the next descriptor (set to NULL if the descriptor is the last one, e.g. suc_eof=1) */ +} __attribute__((aligned(4))); +typedef dma_descriptor_t dma_descriptor_align4_t; +ESP_STATIC_ASSERT(sizeof(dma_descriptor_align4_t) == 12, "dma_descriptor_align4_t should occupy 12 bytes in memory"); -ESP_STATIC_ASSERT(sizeof(dma_descriptor_t) == 12, "dma_descriptor_t should occupy 12 bytes in memory"); +/** + * @brief Type of DMA descriptor the aligned to 8 bytes + */ +typedef struct dma_descriptor_align8_s dma_descriptor_align8_t; +struct dma_descriptor_align8_s { + struct { + uint32_t size : 12; /*!< Buffer size */ + uint32_t length : 12; /*!< Number of valid bytes in the buffer */ + uint32_t reversed24_27 : 4; /*!< Reserved */ + uint32_t err_eof : 1; /*!< Whether the received buffer contains error */ + uint32_t reserved29 : 1; /*!< Reserved */ + uint32_t suc_eof : 1; /*!< Whether the descriptor is the last one in the link */ + uint32_t owner : 1; /*!< Who is allowed to access the buffer that this descriptor points to */ + } dw0; /*!< Descriptor Word 0 */ + void *buffer; /*!< Pointer to the buffer */ + dma_descriptor_align8_t *next; /*!< Pointer to the next descriptor (set to NULL if the descriptor is the last one, e.g. suc_eof=1) */ +} __attribute__((aligned(8))); +ESP_STATIC_ASSERT(sizeof(dma_descriptor_align8_t) == 16, "dma_descriptor_align8_t should occupy 16 bytes in memory"); #define DMA_DESCRIPTOR_BUFFER_OWNER_CPU (0) /*!< DMA buffer is allowed to be accessed by CPU */ #define DMA_DESCRIPTOR_BUFFER_OWNER_DMA (1) /*!< DMA buffer is allowed to be accessed by DMA engine */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE (4095) /*!< Maximum size of the buffer that can be attached to descriptor */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED (4095-3) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 4B */ - #ifdef __cplusplus } #endif From 6d46cf739ce0283d7fe12bbef05d4968848831b0 Mon Sep 17 00:00:00 2001 From: morris Date: Mon, 24 Jul 2023 15:16:40 +0800 Subject: [PATCH 2/2] feat(gdma): test non-cacheable DMA descriptor To avoid different DMA descriptors reside in the same cache line, we want the CPU to access the DMA descriptor in a non-cachable way --- .../test_apps/dma/main/test_gdma.c | 69 ++++++++++++------- components/hal/esp32p4/include/hal/cache_ll.h | 7 +- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/components/esp_hw_support/test_apps/dma/main/test_gdma.c b/components/esp_hw_support/test_apps/dma/main/test_gdma.c index 3e27fec750..37c79af66b 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_gdma.c +++ b/components/esp_hw_support/test_apps/dma/main/test_gdma.c @@ -14,6 +14,7 @@ #include "hal/dma_types.h" #include "soc/soc_caps.h" #include "hal/gdma_ll.h" +#include "hal/cache_ll.h" #include "rom/cache.h" TEST_CASE("GDMA channel allocation", "[GDMA]") @@ -179,51 +180,73 @@ static void test_gdma_m2m_mode(gdma_channel_handle_t tx_chan, gdma_channel_handl memset(src_buf, 0, 256); memset(dst_buf, 0, 256); - dma_descriptor_t *tx_desc = (dma_descriptor_t *) src_buf; - dma_descriptor_t *rx_desc = (dma_descriptor_t *) dst_buf; + dma_descriptor_align8_t *tx_descs = (dma_descriptor_align8_t *) src_buf; + dma_descriptor_align8_t *rx_descs = (dma_descriptor_align8_t *) dst_buf; uint8_t *src_data = src_buf + 64; uint8_t *dst_data = dst_buf + 64; + // prepare the source data for (int i = 0; i < 100; i++) { src_data[i] = i; } - tx_desc->buffer = src_data; - tx_desc->dw0.size = 100; - tx_desc->dw0.length = 100; - tx_desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; - tx_desc->dw0.suc_eof = 1; - tx_desc->next = NULL; +#if CONFIG_IDF_TARGET_ESP32P4 + // CPU and DMA both can write to the DMA descriptor, so if there is a cache, multiple descriptors may reside in the same cache line + // causing data inconsistency. To avoid this, we want to access the descriptor memory without the cache. + dma_descriptor_align8_t *tx_descs_noncache = (dma_descriptor_align8_t *)(CACHE_LL_L2MEM_NON_CACHE_ADDR(tx_descs)); + dma_descriptor_align8_t *rx_descs_noncache = (dma_descriptor_align8_t *)(CACHE_LL_L2MEM_NON_CACHE_ADDR(rx_descs)); - rx_desc->buffer = dst_data; - rx_desc->dw0.size = 100; - rx_desc->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; - rx_desc->next = NULL; + tx_descs_noncache[0].buffer = src_data; + tx_descs_noncache[0].dw0.size = 50; + tx_descs_noncache[0].dw0.length = 50; + tx_descs_noncache[0].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + tx_descs_noncache[0].dw0.suc_eof = 0; + tx_descs_noncache[0].next = &tx_descs[1]; // Note, the DMA doesn't recognize a non-cacheable address, here must be the cached address + + tx_descs_noncache[1].buffer = src_data + 50; + tx_descs_noncache[1].dw0.size = 50; + tx_descs_noncache[1].dw0.length = 50; + tx_descs_noncache[1].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + tx_descs_noncache[1].dw0.suc_eof = 1; + tx_descs_noncache[1].next = NULL; + + rx_descs_noncache->buffer = dst_data; + rx_descs_noncache->dw0.size = 100; + rx_descs_noncache->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + rx_descs_noncache->dw0.suc_eof = 1; + rx_descs_noncache->next = NULL; +#else + tx_descs->buffer = src_data; + tx_descs->dw0.size = 100; + tx_descs->dw0.length = 100; + tx_descs->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + tx_descs->dw0.suc_eof = 1; + tx_descs->next = NULL; + + rx_descs->buffer = dst_data; + rx_descs->dw0.size = 100; + rx_descs->dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + rx_descs->next = NULL; +#endif #if CONFIG_IDF_TARGET_ESP32P4 - // descriptors are in the cache, DMA engine may not see the changes, so do a write-back - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)tx_desc, sizeof(tx_desc)); - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)rx_desc, sizeof(rx_desc)); - // do write-back for the source data + // do write-back for the source data because it's in the cache Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)src_data, 100); #endif - TEST_ESP_OK(gdma_start(rx_chan, (intptr_t)rx_desc)); - TEST_ESP_OK(gdma_start(tx_chan, (intptr_t)tx_desc)); + TEST_ESP_OK(gdma_start(rx_chan, (intptr_t)rx_descs)); + TEST_ESP_OK(gdma_start(tx_chan, (intptr_t)tx_descs)); xSemaphoreTake(done_sem, portMAX_DELAY); #if CONFIG_IDF_TARGET_ESP32P4 // the destination data are not reflected to the cache, so do an invalidate to ask the cache load new data Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)dst_data, 100); - // the DMA descriptors are updated by the DMA as well, so do an invalidate - Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)tx_desc, sizeof(tx_desc)); - Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE, (uint32_t)rx_desc, sizeof(rx_desc)); #endif // check the DMA descriptor write-back feature - TEST_ASSERT_EQUAL(DMA_DESCRIPTOR_BUFFER_OWNER_CPU, tx_desc->dw0.owner); - TEST_ASSERT_EQUAL(DMA_DESCRIPTOR_BUFFER_OWNER_CPU, rx_desc->dw0.owner); + TEST_ASSERT_EQUAL(DMA_DESCRIPTOR_BUFFER_OWNER_CPU, tx_descs[0].dw0.owner); + TEST_ASSERT_EQUAL(DMA_DESCRIPTOR_BUFFER_OWNER_CPU, rx_descs[0].dw0.owner); for (int i = 0; i < 100; i++) { TEST_ASSERT_EQUAL(i, dst_data[i]); diff --git a/components/hal/esp32p4/include/hal/cache_ll.h b/components/hal/esp32p4/include/hal/cache_ll.h index 497f75426e..7f3db05b02 100644 --- a/components/hal/esp32p4/include/hal/cache_ll.h +++ b/components/hal/esp32p4/include/hal/cache_ll.h @@ -17,6 +17,12 @@ extern "C" { #endif +/** + * @brief Given a L2MEM cached address, get the corresponding non-cacheable address + * @example 0x4FF0_0000 => 0x8FF0_0000 + */ +#define CACHE_LL_L2MEM_NON_CACHE_ADDR(addr) ((intptr_t)(addr) + 0x40000000) + #define CACHE_LL_ENABLE_DISABLE_STATE_SW 1 //There's no register indicating cache enable/disable state, we need to use software way for this state. #define CACHE_LL_DEFAULT_IBUS_MASK CACHE_BUS_IBUS0 @@ -36,7 +42,6 @@ extern "C" { // #define CACHE_LL_L1_ILG_EVENT_PRELOAD_OP_FAULT (1<<1) // #define CACHE_LL_L1_ILG_EVENT_SYNC_OP_FAULT (1<<0) - /** * @brief Get the buses of a particular cache that are mapped to a virtual address range *