From 555802887035d14ba0ccbb6817c4e0d9192c4c74 Mon Sep 17 00:00:00 2001 From: morris Date: Sat, 8 Feb 2025 14:09:17 +0800 Subject: [PATCH] bugfix(cache): cache invalidate operation should respect the cache line size not only the buffer address but also the buffer size should aligned to the cache line size --- .../src/bitscrambler_loopback.c | 42 +++++++------------ .../bitscrambler/main/test_bitscrambler.c | 2 +- components/esp_hw_support/dma/esp_dma_utils.c | 7 +++- components/esp_mm/esp_cache.c | 4 +- .../main/bitscrambler_example_main.c | 26 ++++++------ 5 files changed, 36 insertions(+), 45 deletions(-) diff --git a/components/esp_driver_bitscrambler/src/bitscrambler_loopback.c b/components/esp_driver_bitscrambler/src/bitscrambler_loopback.c index f24fa60a8b..68edb662a4 100644 --- a/components/esp_driver_bitscrambler/src/bitscrambler_loopback.c +++ b/components/esp_driver_bitscrambler/src/bitscrambler_loopback.c @@ -20,7 +20,6 @@ #include "esp_check.h" #include "esp_heap_caps.h" #include "esp_cache.h" -#include "esp_dma_utils.h" #include "esp_memory_utils.h" const static char *TAG = "bs_loop"; @@ -209,18 +208,21 @@ esp_err_t bitscrambler_loopback_run(bitscrambler_handle_t bs, void *buffer_in, s return ESP_ERR_INVALID_SIZE; } - //Casual check to see if the buffer is aligned to cache requirements. - esp_dma_mem_info_t dma_mem_info = { - .dma_alignment_bytes = 4 - }; - //Note: we know the size of the data, but not of the buffer that contains it, so we set length=0. - if (!esp_dma_is_buffer_alignment_satisfied(buffer_in, 0, dma_mem_info)) { - ESP_LOGE(TAG, "buffer_in not aligned to DMA requirements"); - return ESP_ERR_INVALID_ARG; + int int_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); + int ext_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA); + + bool need_cache_sync = esp_ptr_internal(buffer_in) ? (int_mem_cache_line_size > 0) : (ext_mem_cache_line_size > 0); + if (need_cache_sync) { + //Note: we add the ESP_CACHE_MSYNC_FLAG_UNALIGNED flag for now as otherwise esp_cache_msync will complain about + //the size not being aligned... we miss out on a check to see if the address is aligned this way. This needs to + //be improved, but potentially needs a fix in esp_cache_msync not to check the size. + ESP_RETURN_ON_ERROR(esp_cache_msync(buffer_in, length_bytes_in, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED), + TAG, "failed in cache sync for input buffer"); } - if (!esp_dma_is_buffer_alignment_satisfied(buffer_out, 0, dma_mem_info)) { - ESP_LOGE(TAG, "buffer_out not aligned to DMA requirements"); - return ESP_ERR_INVALID_ARG; + need_cache_sync = esp_ptr_internal(buffer_out) ? (int_mem_cache_line_size > 0) : (ext_mem_cache_line_size > 0); + if (need_cache_sync) { + ESP_RETURN_ON_ERROR(esp_cache_msync(buffer_out, length_bytes_out, ESP_CACHE_MSYNC_FLAG_DIR_M2C), + TAG, "failed in cache sync for output buffer"); } gdma_reset(bsl->rx_channel); @@ -247,17 +249,6 @@ esp_err_t bitscrambler_loopback_run(bitscrambler_handle_t bs, void *buffer_in, s }; gdma_link_mount_buffers(bsl->rx_link_list, 0, &out_buf_mount_config, 1, NULL); - int int_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA); - int ext_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA); - - bool need_cache_sync = esp_ptr_internal(buffer_in) ? (int_mem_cache_line_size > 0) : (ext_mem_cache_line_size > 0); - if (need_cache_sync) { - //Note: we add the ESP_CACHE_MSYNC_FLAG_UNALIGNED flag for now as otherwise esp_cache_msync will complain about - //the size not being aligned... we miss out on a check to see if the address is aligned this way. This needs to - //be improved, but potentially needs a fix in esp_cache_msync not to check the size. - esp_cache_msync(buffer_in, length_bytes_in, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - } - gdma_start(bsl->rx_channel, gdma_link_get_head_addr(bsl->rx_link_list)); gdma_start(bsl->tx_channel, gdma_link_get_head_addr(bsl->tx_link_list)); bitscrambler_start(bs); @@ -272,11 +263,6 @@ esp_err_t bitscrambler_loopback_run(bitscrambler_handle_t bs, void *buffer_in, s ret = ESP_ERR_TIMEOUT; } - need_cache_sync = esp_ptr_internal(buffer_out) ? (int_mem_cache_line_size > 0) : (ext_mem_cache_line_size > 0); - if (need_cache_sync) { - esp_cache_msync(buffer_out, length_bytes_out, ESP_CACHE_MSYNC_FLAG_DIR_M2C); - } - if (bytes_written) { *bytes_written = gdma_link_count_buffer_size_till_eof(bsl->rx_link_list, 0); } diff --git a/components/esp_driver_bitscrambler/test_apps/bitscrambler/main/test_bitscrambler.c b/components/esp_driver_bitscrambler/test_apps/bitscrambler/main/test_bitscrambler.c index 45a6035920..4a3c1047f1 100644 --- a/components/esp_driver_bitscrambler/test_apps/bitscrambler/main/test_bitscrambler.c +++ b/components/esp_driver_bitscrambler/test_apps/bitscrambler/main/test_bitscrambler.c @@ -16,7 +16,7 @@ BITSCRAMBLER_PROGRAM(bitscrambler_program_timeout, "timeout"); TEST_CASE("Basic BitScrambler I/O", "[bs]") { - int len = 0x4010; + int len = 0x4000; uint8_t *data_in = heap_caps_aligned_calloc(8, 1, len, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); uint8_t *data_out = heap_caps_aligned_calloc(8, 1, len, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); TEST_ASSERT_NOT_NULL(data_in); diff --git a/components/esp_hw_support/dma/esp_dma_utils.c b/components/esp_hw_support/dma/esp_dma_utils.c index b46e40334b..ad97b84e7e 100644 --- a/components/esp_hw_support/dma/esp_dma_utils.c +++ b/components/esp_hw_support/dma/esp_dma_utils.c @@ -101,7 +101,12 @@ esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffe // invalidate the aligned buffer if necessary for (int i = 0; i < 3; i++) { if (need_cache_sync[i]) { - esp_err_t res = esp_cache_msync(align_buf_array->aligned_buffer[i].aligned_buffer, align_buf_array->aligned_buffer[i].length, ESP_CACHE_MSYNC_FLAG_DIR_M2C); + size_t sync_size = align_buf_array->aligned_buffer[i].length; + if (sync_size < split_line_size) { + // If the buffer is smaller than the cache line size, we need to sync the whole buffer + sync_size = split_line_size; + } + esp_err_t res = esp_cache_msync(align_buf_array->aligned_buffer[i].aligned_buffer, sync_size, ESP_CACHE_MSYNC_FLAG_DIR_M2C); ESP_GOTO_ON_ERROR(res, err, TAG, "failed to do cache sync"); } } diff --git a/components/esp_mm/esp_cache.c b/components/esp_mm/esp_cache.c index 3c41a17ff3..ee01552045 100644 --- a/components/esp_mm/esp_cache.c +++ b/components/esp_mm/esp_cache.c @@ -108,8 +108,8 @@ esp_err_t esp_cache_msync(void *addr, size_t size, int flags) } uint32_t cache_line_size = cache_hal_get_cache_line_size(cache_level, cache_type); if ((flags & ESP_CACHE_MSYNC_FLAG_UNALIGNED) == 0) { - bool aligned_addr = (((uint32_t)addr % cache_line_size) == 0); - ESP_RETURN_ON_FALSE_ISR(aligned_addr, ESP_ERR_INVALID_ARG, TAG, "start address: 0x%" PRIx32 " is not aligned with cache line size (0x%" PRIx32 ")B", (uint32_t)addr, cache_line_size); + bool aligned_addr = (((uint32_t)addr % cache_line_size) == 0) && ((size % cache_line_size) == 0); + ESP_RETURN_ON_FALSE_ISR(aligned_addr, ESP_ERR_INVALID_ARG, TAG, "start address: 0x%" PRIx32 ", or the size: 0x%" PRIx32 " is(are) not aligned with cache line size (0x%" PRIx32 ")B", (uint32_t)addr, (uint32_t)size, cache_line_size); } s_acquire_mutex_from_task_context(); diff --git a/examples/peripherals/bitscrambler/main/bitscrambler_example_main.c b/examples/peripherals/bitscrambler/main/bitscrambler_example_main.c index 26457f929a..c3cd1e630c 100644 --- a/examples/peripherals/bitscrambler/main/bitscrambler_example_main.c +++ b/examples/peripherals/bitscrambler/main/bitscrambler_example_main.c @@ -31,25 +31,25 @@ void app_main(void) { ESP_LOGI(TAG, "BitScrambler example main"); - uint8_t *data_in, *data_out; - int len = sizeof(testdata); + size_t test_data_len = sizeof(testdata); + uint8_t* result_buf = heap_caps_calloc(test_data_len, 1, MALLOC_CAP_DMA); + assert(result_buf); - data_in = heap_caps_calloc(len, 1, MALLOC_CAP_DMA); - data_out = heap_caps_calloc(len, 1, MALLOC_CAP_DMA); - assert(data_in); - assert(data_out); - memcpy(data_in, testdata, len); + size_t result_buf_size = heap_caps_get_allocated_size(result_buf); + assert(result_buf_size >= test_data_len); bitscrambler_handle_t bs; - ESP_ERROR_CHECK(bitscrambler_loopback_create(&bs, SOC_BITSCRAMBLER_ATTACH_I2S0, len)); + ESP_ERROR_CHECK(bitscrambler_loopback_create(&bs, SOC_BITSCRAMBLER_ATTACH_I2S0, result_buf_size)); ESP_ERROR_CHECK(bitscrambler_load_program(bs, bitscrambler_program_example)); - size_t size; - ESP_ERROR_CHECK(bitscrambler_loopback_run(bs, data_in, len, data_out, len, &size)); + + size_t result_len; + // we can even use a const array as the input data because the DMA can read data from the Flash region + ESP_ERROR_CHECK(bitscrambler_loopback_run(bs, (void*)testdata, test_data_len, result_buf, result_buf_size, &result_len)); bitscrambler_free(bs); - printf("BitScrambler program complete. Input %d, output %d bytes:\n", len, size); - for (int i = 0; i < size; i++) { - printf("%02X ", data_out[i]); + printf("BitScrambler program complete. Input %zu, output %zu bytes:\n", test_data_len, result_len); + for (size_t i = 0; i < result_len; i++) { + printf("%02X ", result_buf[i]); if ((i & 7) == 7) { printf("\n"); }