diff --git a/components/esp_hw_support/dma/async_memcpy_gdma.c b/components/esp_hw_support/dma/async_memcpy_gdma.c index 7376f8fd3b..2538ece16c 100644 --- a/components/esp_hw_support/dma/async_memcpy_gdma.c +++ b/components/esp_hw_support/dma/async_memcpy_gdma.c @@ -321,17 +321,25 @@ static async_memcpy_transaction_t *try_pop_trans_from_idle_queue(async_memcpy_gd static bool check_buffer_aligned(async_memcpy_gdma_context_t *mcp_gdma, void *src, void *dst, size_t n) { bool valid = true; + uint32_t align_mask = 0; if (esp_ptr_external_ram(dst)) { if (mcp_gdma->psram_trans_align) { - valid = valid && (((intptr_t)dst & (mcp_gdma->psram_trans_align - 1)) == 0); - valid = valid && ((n & (mcp_gdma->psram_trans_align - 1)) == 0); + align_mask = mcp_gdma->psram_trans_align - 1; } } else { if (mcp_gdma->sram_trans_align) { - valid = valid && (((intptr_t)dst & (mcp_gdma->sram_trans_align - 1)) == 0); - valid = valid && ((n & (mcp_gdma->sram_trans_align - 1)) == 0); + align_mask = mcp_gdma->sram_trans_align - 1; } } +#if CONFIG_IDF_TARGET_ESP32P4 + uint32_t data_cache_line_mask = cache_hal_get_cache_line_size(CACHE_TYPE_DATA) - 1; + if (data_cache_line_mask > align_mask) { + align_mask = data_cache_line_mask; + } +#endif + // destination address must be cache line aligned + valid = valid && (((uint32_t)dst & align_mask) == 0); + valid = valid && ((n & align_mask) == 0); return valid; } diff --git a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c index affd3c6279..fa750b2012 100644 --- a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c +++ b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c @@ -25,6 +25,12 @@ #define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1)) #define ALIGN_DOWN(size, align) ((size) & ~((align) - 1)) +#if CONFIG_IDF_TARGET_ESP32P4 +#define TEST_MEMCPY_DST_BASE_ALIGN 64 +#else +#define TEST_MEMCPY_DST_BASE_ALIGN 4 +#endif + typedef struct { uint32_t seed; size_t buffer_size; @@ -68,8 +74,9 @@ static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_contex TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed"); // adding extra offset from_addr = src_buf + test_context->offset; - to_addr = dst_buf + test_context->offset; + to_addr = dst_buf; copy_size -= test_context->offset; + copy_size &= ~(test_context->align - 1); printf("...to copy size %zu Bytes, from @%p, to @%p\r\n", copy_size, from_addr, to_addr); printf("fill src buffer with random data\r\n"); @@ -106,7 +113,7 @@ TEST_CASE("memory copy the same buffer with different content", "[async mcp]") async_memcpy_handle_t driver = NULL; TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint8_t *sbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - uint8_t *dbuf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + uint8_t *dbuf = heap_caps_aligned_alloc(TEST_MEMCPY_DST_BASE_ALIGN, 256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); for (int j = 0; j < 20; j++) { TEST_ESP_OK(esp_async_memcpy(driver, dbuf, sbuf, 256, NULL, NULL)); vTaskDelay(pdMS_TO_TICKS(10)); @@ -128,7 +135,7 @@ static void test_memory_copy_one_by_one(async_memcpy_handle_t driver) { uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011}; memcpy_testbench_context_t test_context = { - .align = 4, + .align = TEST_MEMCPY_DST_BASE_ALIGN, }; for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) { @@ -194,7 +201,8 @@ TEST_CASE("memory copy done callback", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint8_t *src_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); - uint8_t *dst_buf = heap_caps_malloc(256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); + // destination address should aligned to data cache line + uint8_t *dst_buf = heap_caps_aligned_alloc(TEST_MEMCPY_DST_BASE_ALIGN, 256, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); SemaphoreHandle_t sem = xSemaphoreCreateBinary(); TEST_ESP_OK(esp_async_memcpy(driver, dst_buf, src_buf, 256, test_async_memcpy_cb_v1, sem)); @@ -212,8 +220,10 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]") TEST_ESP_OK(esp_async_memcpy_install(&config, &driver)); uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011}; - memcpy_testbench_context_t test_context[] = { - {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, + memcpy_testbench_context_t test_context[5] = { + [0 ... 4] = { + .align = TEST_MEMCPY_DST_BASE_ALIGN, + } }; // Aligned case