diff --git a/components/esp_driver_rmt/src/rmt_private.h b/components/esp_driver_rmt/src/rmt_private.h index 647dffd877..aa27a118d3 100644 --- a/components/esp_driver_rmt/src/rmt_private.h +++ b/components/esp_driver_rmt/src/rmt_private.h @@ -67,6 +67,9 @@ typedef dma_descriptor_align4_t rmt_dma_descriptor_t; #define RMT_GET_NON_CACHE_ADDR(addr) (addr) #endif +#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) +#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) + typedef struct { struct { rmt_symbol_word_t symbols[SOC_RMT_MEM_WORDS_PER_CHANNEL]; diff --git a/components/esp_driver_rmt/src/rmt_rx.c b/components/esp_driver_rmt/src/rmt_rx.c index 1e8fec97dc..f377381f97 100644 --- a/components/esp_driver_rmt/src/rmt_rx.c +++ b/components/esp_driver_rmt/src/rmt_rx.c @@ -28,9 +28,6 @@ #include "driver/rmt_rx.h" #include "rmt_private.h" -#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) -#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) - static const char *TAG = "rmt"; static esp_err_t rmt_del_rx_channel(rmt_channel_handle_t channel); @@ -207,10 +204,20 @@ esp_err_t rmt_new_rx_channel(const rmt_rx_channel_config_t *config, rmt_channel_ uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); // the alignment should meet both the DMA and cache requirement size_t alignment = MAX(data_cache_line_size, RMT_DMA_DESC_ALIGN); - rx_channel->dma_nodes = heap_caps_aligned_calloc(alignment, num_dma_nodes, sizeof(rmt_dma_descriptor_t), mem_caps); - ESP_GOTO_ON_FALSE(rx_channel->dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for rx channel DMA nodes"); + size_t dma_nodes_size = ALIGN_UP(num_dma_nodes * sizeof(rmt_dma_descriptor_t), alignment); + rmt_dma_descriptor_t *dma_nodes = heap_caps_aligned_calloc(alignment, 1, dma_nodes_size, mem_caps); + ESP_GOTO_ON_FALSE(dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for rx channel DMA nodes"); + rx_channel->dma_nodes = dma_nodes; + // do memory sync only when the data cache exists + if (data_cache_line_size) { + // write back and then invalidate the cached dma_nodes, we will skip the cache (by non-cacheable address) when access the dma_nodes + // even the cache auto-write back happens, there's no risk the dma_nodes will be overwritten + ESP_GOTO_ON_ERROR(esp_cache_msync(dma_nodes, dma_nodes_size, + ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_INVALIDATE), + err, TAG, "cache sync failed"); + } // we will use the non-cached address to manipulate the DMA descriptor, for simplicity - rx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(rx_channel->dma_nodes); + rx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(dma_nodes); } rx_channel->num_dma_nodes = num_dma_nodes; // register the channel to group @@ -351,11 +358,11 @@ esp_err_t rmt_receive(rmt_channel_handle_t channel, void *buffer, size_t buffer_ rmt_rx_channel_t *rx_chan = __containerof(channel, rmt_rx_channel_t, base); size_t per_dma_block_size = 0; size_t last_dma_block_size = 0; + uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); if (channel->dma_chan) { // Currently we assume the user buffer is allocated from internal RAM, PSRAM is not supported yet. ESP_RETURN_ON_FALSE_ISR(esp_ptr_internal(buffer), ESP_ERR_INVALID_ARG, TAG, "user buffer not allocated from internal RAM"); - uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); // DMA doesn't have alignment requirement for SRAM buffer if the burst mode is not enabled, // but we need to make sure the buffer is aligned to cache line size uint32_t align_mask = data_cache_line_size ? (data_cache_line_size - 1) : 0; @@ -395,6 +402,10 @@ esp_err_t rmt_receive(rmt_channel_handle_t channel, void *buffer, size_t buffer_ if (channel->dma_chan) { #if SOC_RMT_SUPPORT_DMA + // invalidate the user buffer, in case cache auto-write back happens and breaks the data just written by the DMA + if (data_cache_line_size) { + ESP_RETURN_ON_ERROR_ISR(esp_cache_msync(buffer, buffer_size, ESP_CACHE_MSYNC_FLAG_DIR_M2C), TAG, "cache sync failed"); + } rmt_rx_mount_dma_buffer(rx_chan, buffer, buffer_size, per_dma_block_size, last_dma_block_size); gdma_reset(channel->dma_chan); gdma_start(channel->dma_chan, (intptr_t)rx_chan->dma_nodes); // note, we must use the cached descriptor address to start the DMA diff --git a/components/esp_driver_rmt/src/rmt_tx.c b/components/esp_driver_rmt/src/rmt_tx.c index 81e019c3d3..05acb61cd8 100644 --- a/components/esp_driver_rmt/src/rmt_tx.c +++ b/components/esp_driver_rmt/src/rmt_tx.c @@ -23,6 +23,7 @@ #include "hal/gpio_hal.h" #include "hal/cache_hal.h" #include "hal/cache_ll.h" +#include "esp_cache.h" #include "driver/gpio.h" #include "driver/rmt_tx.h" #include "rmt_private.h" @@ -54,10 +55,20 @@ static esp_err_t rmt_tx_init_dma_link(rmt_tx_channel_t *tx_channel, const rmt_tx uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); // the alignment should meet both the DMA and cache requirement size_t alignment = MAX(data_cache_line_size, sizeof(rmt_symbol_word_t)); - rmt_symbol_word_t *dma_mem_base = heap_caps_aligned_calloc(alignment, config->mem_block_symbols, sizeof(rmt_symbol_word_t), + size_t dma_mem_base_size = ALIGN_UP(config->mem_block_symbols * sizeof(rmt_symbol_word_t), alignment); + rmt_symbol_word_t *dma_mem_base = heap_caps_aligned_calloc(alignment, 1, dma_mem_base_size, RMT_MEM_ALLOC_CAPS | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); ESP_RETURN_ON_FALSE(dma_mem_base, ESP_ERR_NO_MEM, TAG, "no mem for tx DMA buffer"); tx_channel->dma_mem_base = dma_mem_base; + // do memory sync only when the data cache exists + if (data_cache_line_size) { + // write back and then invalidate the cache, we will skip the cache (by non-cacheable address) when access the dma_mem_base + // even the cache auto-write back happens, there's no risk the dma_mem_base will be overwritten + ESP_RETURN_ON_ERROR(esp_cache_msync(dma_mem_base, dma_mem_base_size, + ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_INVALIDATE), + TAG, "cache sync failed"); + } + // we use the non-cached address to manipulate this DMA buffer tx_channel->dma_mem_base_nc = (rmt_symbol_word_t *)RMT_GET_NON_CACHE_ADDR(dma_mem_base); for (int i = 0; i < RMT_DMA_NODES_PING_PONG; i++) { // each descriptor shares half of the DMA buffer @@ -258,10 +269,18 @@ esp_err_t rmt_new_tx_channel(const rmt_tx_channel_config_t *config, rmt_channel_ uint32_t data_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); // the alignment should meet both the DMA and cache requirement size_t alignment = MAX(data_cache_line_size, RMT_DMA_DESC_ALIGN); - tx_channel->dma_nodes = heap_caps_aligned_calloc(alignment, RMT_DMA_NODES_PING_PONG, sizeof(rmt_dma_descriptor_t), mem_caps); - ESP_GOTO_ON_FALSE(tx_channel->dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for tx DMA nodes"); + size_t dma_nodes_mem_size = ALIGN_UP(RMT_DMA_NODES_PING_PONG * sizeof(rmt_dma_descriptor_t), alignment); + rmt_dma_descriptor_t *dma_nodes = heap_caps_aligned_calloc(alignment, 1, dma_nodes_mem_size, mem_caps); + ESP_GOTO_ON_FALSE(dma_nodes, ESP_ERR_NO_MEM, err, TAG, "no mem for tx DMA nodes"); + tx_channel->dma_nodes = dma_nodes; + // write back and then invalidate the cached dma_nodes, we will skip the cache (by non-cacheable address) when access the dma_nodes + if (data_cache_line_size) { + ESP_GOTO_ON_ERROR(esp_cache_msync(dma_nodes, dma_nodes_mem_size, + ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_INVALIDATE), + err, TAG, "cache sync failed"); + } // we will use the non-cached address to manipulate the DMA descriptor, for simplicity - tx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(tx_channel->dma_nodes); + tx_channel->dma_nodes_nc = (rmt_dma_descriptor_t *)RMT_GET_NON_CACHE_ADDR(dma_nodes); } // create transaction queues ESP_GOTO_ON_ERROR(rmt_tx_create_trans_queue(tx_channel, config), err, TAG, "install trans queues failed");