From 16d25b4a52fc0c3d23e29e4972a0f02d8b0cb9b6 Mon Sep 17 00:00:00 2001
From: Chen Jichang <chenjichang@espressif.com>
Date: Fri, 9 May 2025 17:11:25 +0800
Subject: [PATCH] feat(gdma): support gdma weighted arbitration on ESP32C5

---
 Kconfig                                       |   1 +
 components/esp_hw_support/dma/Kconfig.dma     |  11 ++
 .../esp_hw_support/dma/async_memcpy_gdma.c    |   6 +
 components/esp_hw_support/dma/gdma.c          |  27 +++-
 .../dma/include/esp_private/gdma.h            |  24 ++-
 .../esp_hw_support/include/esp_async_memcpy.h |   1 +
 .../test_apps/dma/main/test_async_memcpy.c    | 142 ++++++++++++++++--
 .../test_apps/dma/pytest_dma.py               |  14 ++
 .../dma/sdkconfig.weighted_arbitration        |   2 +
 .../hal/esp32c5/include/hal/ahb_dma_ll.h      |  52 +++++++
 components/hal/gdma_hal_ahb_v2.c              |  28 +++-
 components/hal/gdma_hal_top.c                 |   9 +-
 components/hal/include/hal/gdma_hal.h         |  12 +-
 .../esp32c5/include/soc/Kconfig.soc_caps.in   |   4 +
 components/soc/esp32c5/include/soc/soc_caps.h |   1 +
 15 files changed, 317 insertions(+), 17 deletions(-)
 create mode 100644 components/esp_hw_support/test_apps/dma/sdkconfig.weighted_arbitration

diff --git a/Kconfig b/Kconfig
index 599e23e2db..72fa9f8853 100644
--- a/Kconfig
+++ b/Kconfig
@@ -705,3 +705,4 @@ mainmenu "Espressif IoT Development Framework Configuration"
             - CONFIG_ESP_WIFI_ENABLE_ROAMING_APP
             - CONFIG_USB_HOST_EXT_PORT_RESET_ATTEMPTS
             - CONFIG_LIBC_PICOLIBC
+            - CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
diff --git a/components/esp_hw_support/dma/Kconfig.dma b/components/esp_hw_support/dma/Kconfig.dma
index d95212df71..3e89842cc0 100644
--- a/components/esp_hw_support/dma/Kconfig.dma
+++ b/components/esp_hw_support/dma/Kconfig.dma
@@ -43,6 +43,17 @@ menu "GDMA Configurations"
             If this option is enabled, ALL GDMA channel's ISR handlers should be placed in IRAM, which is a overkill.
             It's recommend to set the "isr_cache_safe" in the "gdma_channel_alloc_config_t".
             Then other GDMA channels won't be influenced.
+
+    config GDMA_ENABLE_WEIGHTED_ARBITRATION
+        depends on SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION && IDF_EXPERIMENTAL_FEATURES
+        bool "GDMA enable weighted arbitration (Experimental)"
+        default n
+        help
+            Whether to enable the weighted arbitration for GDMA driver.
+            The default weight of each channel is 1. You need to set weight for each channel before transmissions.
+            If this option is enabled, the buffer should be aligned to the burst size.
+
+
 endmenu # GDMA Configurations
 
 menu "DW_GDMA Configurations"
diff --git a/components/esp_hw_support/dma/async_memcpy_gdma.c b/components/esp_hw_support/dma/async_memcpy_gdma.c
index 527e0ff79c..109e77bf71 100644
--- a/components/esp_hw_support/dma/async_memcpy_gdma.c
+++ b/components/esp_hw_support/dma/async_memcpy_gdma.c
@@ -145,6 +145,12 @@ static esp_err_t esp_async_memcpy_install_gdma_template(const async_memcpy_confi
     gdma_apply_strategy(mcp_gdma->tx_channel, &strategy_cfg);
     gdma_apply_strategy(mcp_gdma->rx_channel, &strategy_cfg);
 
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+    if(config->weight){
+        ESP_GOTO_ON_ERROR(gdma_set_weight(mcp_gdma->rx_channel, config->weight), err, TAG, "Set GDMA rx channel weight failed");
+        ESP_GOTO_ON_ERROR(gdma_set_weight(mcp_gdma->tx_channel, config->weight), err, TAG, "Set GDMA tx channel weight failed");
+    }
+#endif
     gdma_transfer_config_t transfer_cfg = {
         .max_data_burst_size = config->dma_burst_size,
         .access_ext_mem = true, // allow to do memory copy from/to external memory
diff --git a/components/esp_hw_support/dma/gdma.c b/components/esp_hw_support/dma/gdma.c
index f222e9817e..ed99659d43 100644
--- a/components/esp_hw_support/dma/gdma.c
+++ b/components/esp_hw_support/dma/gdma.c
@@ -173,7 +173,10 @@ search_done:
         alloc_rx_channel->base.del = gdma_del_rx_channel; // set channel deletion function
         *ret_chan = &alloc_rx_channel->base; // return the installed channel
     }
-
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+    // set 1 as default weight, can be overwritten by user
+    gdma_set_weight(*ret_chan, 1);
+#endif
     (*ret_chan)->spinlock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED;
     ESP_LOGD(TAG, "new %s channel (%d,%d) at %p", (config->direction == GDMA_CHANNEL_DIRECTION_TX) ? "tx" : "rx",
              group->group_id, pair->pair_id, *ret_chan);
@@ -370,6 +373,11 @@ esp_err_t gdma_config_transfer(gdma_channel_handle_t dma_chan, const gdma_transf
     gdma_hal_enable_burst(hal, pair->pair_id, dma_chan->direction, en_data_burst, en_desc_burst);
     if (en_data_burst) {
         gdma_hal_set_burst_size(hal, pair->pair_id, dma_chan->direction, max_data_burst_size);
+#if CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
+        // due to hardware limitation, if weighted arbitration is enabled, the data must be aligned to burst size
+        int_mem_alignment = MAX(int_mem_alignment, max_data_burst_size);
+        ext_mem_alignment = MAX(ext_mem_alignment, max_data_burst_size);
+#endif
     }
 
 #if GDMA_LL_AHB_RX_BURST_NEEDS_ALIGNMENT
@@ -437,6 +445,20 @@ esp_err_t gdma_set_priority(gdma_channel_handle_t dma_chan, uint32_t priority)
     return ESP_OK;
 }
 
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+esp_err_t gdma_set_weight(gdma_channel_handle_t dma_chan, uint32_t weight)
+{
+    ESP_RETURN_ON_FALSE(dma_chan && weight <= GDMA_LL_CHANNEL_MAX_WEIGHT, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
+    gdma_pair_t *pair = dma_chan->pair;
+    gdma_group_t *group = pair->group;
+    gdma_hal_context_t *hal = &group->hal;
+
+    gdma_hal_set_weight(hal, pair->pair_id, dma_chan->direction, weight);
+
+    return ESP_OK;
+}
+# endif
+
 esp_err_t gdma_register_tx_event_callbacks(gdma_channel_handle_t dma_chan, gdma_tx_event_callbacks_t *cbs, void *user_data)
 {
     ESP_RETURN_ON_FALSE(dma_chan && cbs && dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
@@ -636,6 +658,9 @@ static gdma_group_t *gdma_acquire_group_handle(int group_id, void (*hal_init)(gd
         }
         gdma_hal_config_t config = {
             .group_id = group_id,
+#if CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
+            .flags.enable_weighted_arbitration = true,
+#endif
         };
         hal_init(&group->hal, &config);
         ESP_LOGD(TAG, "new group (%d) at %p", group_id, group);
diff --git a/components/esp_hw_support/dma/include/esp_private/gdma.h b/components/esp_hw_support/dma/include/esp_private/gdma.h
index af595f7c62..a3d49d7f6c 100644
--- a/components/esp_hw_support/dma/include/esp_private/gdma.h
+++ b/components/esp_hw_support/dma/include/esp_private/gdma.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2020-2025 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -462,6 +462,28 @@ esp_err_t gdma_config_crc_calculator(gdma_channel_handle_t dma_chan, const gdma_
 esp_err_t gdma_crc_get_result(gdma_channel_handle_t dma_chan, uint32_t *result);
 #endif // SOC_GDMA_SUPPORT_CRC
 
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+/**
+ * @brief Set GDMA channel weight (0 ~ 15), default weight is 1
+ *
+ * @note Once weight arbitration is enabled, The arbitrator will allocate the corresponding number of tokens based on the weight of each channel.
+ *       You need to set weights for each channel. If the weight is 0, the channel requests will no longer be responded to.
+ * @note If channels have different weights, the channel with a larger weight can get a larger bandwidth.
+ *       e.g. if channel A has weight 2, channel B has weight 3 and channel C has weight 4, the percentage of bandwidth allocated to channel A is 2/9,
+ *       the percentage of bandwidth allocated to channel B is 3/9, and the percentage of bandwidth allocated to channel C is 4/9.
+ * @note Weighted arbitration is different from priority arbitration. "Weight" is used after comparing "priority"
+ *       After the priority comparison, then arbitrator checks whether there are still unused tokens in the channel.
+ *
+ * @param[in] dma_chan GDMA channel handle, allocated by `gdma_new_channel`
+ * @param[in] weight Weight of GDMA channel, higher value means higher priority in weighted arbitration.
+ * @return
+ *      - ESP_OK: Set GDMA channel weight successfully
+ *      - ESP_ERR_INVALID_ARG: Set GDMA channel weight failed because of invalid argument. e.g. weight out of range [0,GDMA_LL_CHANNEL_MAX_WEIGHT]
+ *      - ESP_FAIL: Set GDMA channel weight failed because of other error
+ */
+esp_err_t gdma_set_weight(gdma_channel_handle_t dma_chan, uint32_t weight);
+#endif // SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+
 /****************************************************************************************
  * Deprecated APIs (will be removed in esp-idf 6.0)
  ****************************************************************************************/
diff --git a/components/esp_hw_support/include/esp_async_memcpy.h b/components/esp_hw_support/include/esp_async_memcpy.h
index d0ad260055..fd05b5ebec 100644
--- a/components/esp_hw_support/include/esp_async_memcpy.h
+++ b/components/esp_hw_support/include/esp_async_memcpy.h
@@ -51,6 +51,7 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_handle_t mcp_hdl, async_memcp
  */
 typedef struct {
     uint32_t backlog;          /*!< Maximum number of transactions that can be prepared in the background */
+    uint32_t weight;           /*!< Weight of async memcpy dma channel, higher weight means higher average bandwidth */
     size_t sram_trans_align __attribute__((deprecated)); /*!< DMA transfer alignment (both in size and address) for SRAM memory */
     union {
         size_t psram_trans_align __attribute__((deprecated)); /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
diff --git a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c
index 7374d0589c..4246b4009b 100644
--- a/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c
+++ b/components/esp_hw_support/test_apps/dma/main/test_async_memcpy.c
@@ -36,6 +36,7 @@ typedef struct {
     uint32_t dst_offset;
     bool src_in_psram;
     bool dst_in_psram;
+    bool src_dst_same;
 } memcpy_testbench_context_t;
 
 static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_context)
@@ -51,10 +52,13 @@ static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_contex
     uint32_t mem_caps = test_context->src_in_psram ? MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA | MALLOC_CAP_8BIT :  MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_8BIT ;
     src_buf = heap_caps_aligned_calloc(test_context->align, 1, buffer_size, mem_caps);
     TEST_ASSERT_NOT_NULL(src_buf);
-
-    mem_caps = test_context->dst_in_psram ? MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA | MALLOC_CAP_8BIT :  MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_8BIT ;
-    dst_buf = heap_caps_aligned_calloc(test_context->align, 1, buffer_size, mem_caps);
-    TEST_ASSERT_NOT_NULL(dst_buf);
+    if(test_context->src_dst_same) {
+        dst_buf = src_buf;
+    } else {
+        mem_caps = test_context->dst_in_psram ? MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA | MALLOC_CAP_8BIT :  MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_8BIT ;
+        dst_buf = heap_caps_aligned_calloc(test_context->align, 1, buffer_size, mem_caps);
+        TEST_ASSERT_NOT_NULL(dst_buf);
+    }
 
     // adding extra offset
     from_addr = src_buf + test_context->src_offset;
@@ -87,10 +91,10 @@ static void async_memcpy_verify_and_clear_testbench(uint32_t copy_size, uint8_t
     free(dst_buf);
 }
 
-static void test_memory_copy_with_same_buffer(async_memcpy_handle_t driver)
+static void test_memory_copy_with_same_buffer(async_memcpy_handle_t driver, async_memcpy_config_t *config)
 {
-    uint8_t *sbuf = heap_caps_calloc(1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
-    uint8_t *dbuf = heap_caps_calloc(1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
+    uint8_t *sbuf = heap_caps_aligned_calloc(config->dma_burst_size, 1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
+    uint8_t *dbuf = heap_caps_aligned_calloc(config->dma_burst_size, 1, 256, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
     TEST_ASSERT_NOT_NULL(sbuf);
     TEST_ASSERT_NOT_NULL(dbuf);
 
@@ -118,21 +122,21 @@ TEST_CASE("memory copy the same buffer with different content", "[async mcp]")
 #if SOC_AHB_GDMA_SUPPORTED
     printf("Testing memcpy by AHB GDMA\r\n");
     TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&config, &driver));
-    test_memory_copy_with_same_buffer(driver);
+    test_memory_copy_with_same_buffer(driver, &config);
     TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
 #endif // SOC_AHB_GDMA_SUPPORTED
 
 #if SOC_AXI_GDMA_SUPPORTED
     printf("Testing memcpy by AXI GDMA\r\n");
     TEST_ESP_OK(esp_async_memcpy_install_gdma_axi(&config, &driver));
-    test_memory_copy_with_same_buffer(driver);
+    test_memory_copy_with_same_buffer(driver, &config);
     TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
 #endif // SOC_AXI_GDMA_SUPPORTED
 
 #if SOC_CP_DMA_SUPPORTED
     printf("Testing memcpy by CP DMA\r\n");
     TEST_ESP_OK(esp_async_memcpy_install_cpdma(&config, &driver));
-    test_memory_copy_with_same_buffer(driver);
+    test_memory_copy_with_same_buffer(driver, &config);
     TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
 #endif // SOC_CP_DMA_SUPPORTED
 }
@@ -243,7 +247,7 @@ TEST_CASE("memory copy with dest address unaligned", "[async mcp]")
     TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
 #endif // SOC_CP_DMA_SUPPORTED
 
-#if SOC_AHB_GDMA_SUPPORTED && !GDMA_LL_AHB_RX_BURST_NEEDS_ALIGNMENT
+#if SOC_AHB_GDMA_SUPPORTED && !GDMA_LL_AHB_RX_BURST_NEEDS_ALIGNMENT && !CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
     printf("Testing memcpy by AHB GDMA\r\n");
     TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&driver_config, &driver));
     test_memcpy_with_dest_addr_unaligned(driver, false, false);
@@ -253,7 +257,7 @@ TEST_CASE("memory copy with dest address unaligned", "[async mcp]")
     TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
 #endif // SOC_AHB_GDMA_SUPPORTED
 
-#if SOC_AXI_GDMA_SUPPORTED
+#if SOC_AXI_GDMA_SUPPORTED && !CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
     printf("Testing memcpy by AXI GDMA\r\n");
     TEST_ESP_OK(esp_async_memcpy_install_gdma_axi(&driver_config, &driver));
     test_memcpy_with_dest_addr_unaligned(driver, false, false);
@@ -377,3 +381,117 @@ TEST_CASE("memory copy performance 40KB: PSRAM->PSRAM", "[async mcp]")
 #endif // SOC_AXI_GDMA_SUPPORTED && SOC_AXI_GDMA_SUPPORT_PSRAM
 }
 #endif
+
+#if CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
+typedef struct {
+    SemaphoreHandle_t sem;
+    int64_t elapse_us;
+} test_weighted_arb_context_t;
+static IRAM_ATTR bool test_weighted_arb_isr_cb(async_memcpy_handle_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
+{
+    test_weighted_arb_context_t *ctx = (test_weighted_arb_context_t *)cb_args;
+    BaseType_t high_task_wakeup = pdFALSE;
+    ctx->elapse_us = ccomp_timer_get_time();
+    xSemaphoreGiveFromISR(ctx->sem, &high_task_wakeup);
+    return high_task_wakeup == pdTRUE;
+}
+
+static void memcpy_weighted_arb_test(async_memcpy_handle_t driver[2], size_t burst_size, uint32_t buffer_size, bool buffer_in_psram)
+{
+    SemaphoreHandle_t sem[2] = {xSemaphoreCreateBinary(),xSemaphoreCreateBinary()};
+    int64_t elapse_us[2] = {0};
+    float throughput[2] = {0.0};
+
+    memcpy_testbench_context_t test_context = {
+        .align = burst_size,
+        .buffer_size = buffer_size,
+        .src_dst_same = !buffer_in_psram, // if buffer is in PSRAM, no memory size limitation
+        .src_in_psram = buffer_in_psram,
+        .dst_in_psram = buffer_in_psram,
+    };
+    async_memcpy_setup_testbench(&test_context);
+    test_weighted_arb_context_t ctx[2] = {
+        [0] = {
+            .sem = sem[0],
+        },
+        [1] = {
+            .sem = sem[1],
+        }
+    };
+
+    ccomp_timer_start();
+    TEST_ESP_OK(esp_async_memcpy(driver[0], test_context.to_addr, test_context.from_addr, test_context.copy_size, test_weighted_arb_isr_cb, &ctx[0]));
+    TEST_ESP_OK(esp_async_memcpy(driver[1], test_context.to_addr, test_context.from_addr, test_context.copy_size, test_weighted_arb_isr_cb, &ctx[1]));
+
+    // get channel_1 spent time
+    TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem[1], pdMS_TO_TICKS(1000)));
+    elapse_us[1] = ctx[1].elapse_us;
+
+    // wait for channel_0 done, keep channel_1 busy to do arbitration
+    while(xSemaphoreTake(sem[0], 0) == pdFALSE) {
+        TEST_ESP_OK(esp_async_memcpy(driver[1], test_context.to_addr, test_context.from_addr, test_context.copy_size, test_weighted_arb_isr_cb, &ctx[1]));
+        TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem[1], pdMS_TO_TICKS(1000)));
+    }
+    // get channel_0 spent time
+    elapse_us[0] = ctx[0].elapse_us;
+
+    ccomp_timer_stop();
+    throughput[0] = (float)test_context.buffer_size * 1e6  / 1024 / 1024 / elapse_us[0];
+    IDF_LOG_PERFORMANCE("DMA0_COPY", "%.2f MB/s, size: %zu Bytes", throughput[0], test_context.buffer_size);
+
+    throughput[1] = (float)test_context.buffer_size * 1e6  / 1024 / 1024 / elapse_us[1];
+    IDF_LOG_PERFORMANCE("DMA1_COPY", "%.2f MB/s, size: %zu Bytes", throughput[1], test_context.buffer_size);
+
+    // the bandwidth of channel_1 should be at least 10 times of channel_0
+    TEST_ASSERT_EQUAL(throughput[1] / throughput[0] > 10, true);
+
+    async_memcpy_verify_and_clear_testbench(test_context.copy_size, test_context.src_buf, buffer_in_psram ? test_context.dst_buf : NULL,
+                                            test_context.from_addr, test_context.to_addr);
+
+    vSemaphoreDelete(sem[0]);
+    vSemaphoreDelete(sem[1]);
+}
+
+TEST_CASE("GDMA M2M Weighted Arbitration Test SRAM->SRAM", "[GDMA][M2M][async mcp]")
+{
+    async_memcpy_config_t driver_config = {
+        .backlog = TEST_ASYNC_MEMCPY_BENCH_COUNTS,
+        .dma_burst_size = 64,
+    };
+
+    async_memcpy_handle_t driver[2] = {NULL};
+
+#if SOC_AHB_GDMA_SUPPORTED
+    driver_config.weight = 1;
+    TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&driver_config, &driver[0]));
+    driver_config.weight = 15;
+    TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&driver_config, &driver[1]));
+    memcpy_weighted_arb_test(driver, driver_config.dma_burst_size, 200 * 1024, false);
+    TEST_ESP_OK(esp_async_memcpy_uninstall(driver[0]));
+    TEST_ESP_OK(esp_async_memcpy_uninstall(driver[1]));
+#endif // SOC_AHB_GDMA_SUPPORTED
+}
+
+#if SOC_SPIRAM_SUPPORTED
+TEST_CASE("GDMA M2M Weighted Arbitration Test PSRAM->PSRAM", "[GDMA][M2M][async mcp]")
+{
+    [[maybe_unused]] async_memcpy_config_t driver_config = {
+        .backlog = TEST_ASYNC_MEMCPY_BENCH_COUNTS,
+        .dma_burst_size = 32,   // PSRAM bandwidth may be not enough if burst size is 64
+    };
+
+    [[maybe_unused]] async_memcpy_handle_t driver[2] = {NULL};
+
+#if SOC_AHB_GDMA_SUPPORTED && SOC_AHB_GDMA_SUPPORT_PSRAM
+    driver_config.weight = 1;
+    TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&driver_config, &driver[0]));
+    driver_config.weight = 15;
+    TEST_ESP_OK(esp_async_memcpy_install_gdma_ahb(&driver_config, &driver[1]));
+    memcpy_weighted_arb_test(driver, driver_config.dma_burst_size, 200 * 1024, true);
+    TEST_ESP_OK(esp_async_memcpy_uninstall(driver[0]));
+    TEST_ESP_OK(esp_async_memcpy_uninstall(driver[1]));
+#endif // SOC_AHB_GDMA_SUPPORTED && SOC_AHB_GDMA_SUPPORT_PSRAM
+}
+#endif // SOC_SPIRAM_SUPPORTED
+
+#endif // CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION
diff --git a/components/esp_hw_support/test_apps/dma/pytest_dma.py b/components/esp_hw_support/test_apps/dma/pytest_dma.py
index e70adf3476..a009ce6d50 100644
--- a/components/esp_hw_support/test_apps/dma/pytest_dma.py
+++ b/components/esp_hw_support/test_apps/dma/pytest_dma.py
@@ -3,6 +3,7 @@
 import pytest
 from pytest_embedded import Dut
 from pytest_embedded_idf.utils import idf_parametrize
+from pytest_embedded_idf.utils import soc_filtered_targets
 
 
 @pytest.mark.generic
@@ -33,3 +34,16 @@ def test_dma(dut: Dut) -> None:
 @idf_parametrize('target', ['esp32s3'], indirect=['target'])
 def test_dma_psram(dut: Dut) -> None:
     dut.run_all_single_board_cases(reset=True)
+
+
+@pytest.mark.generic
+@pytest.mark.parametrize(
+    'config',
+    [
+        'weighted_arbitration',
+    ],
+    indirect=True,
+)
+@idf_parametrize('target', soc_filtered_targets('SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION == 1'), indirect=['target'])
+def test_dma_weighted_arbitration(dut: Dut) -> None:
+    dut.run_all_single_board_cases(reset=True)
diff --git a/components/esp_hw_support/test_apps/dma/sdkconfig.weighted_arbitration b/components/esp_hw_support/test_apps/dma/sdkconfig.weighted_arbitration
new file mode 100644
index 0000000000..2bf15839c9
--- /dev/null
+++ b/components/esp_hw_support/test_apps/dma/sdkconfig.weighted_arbitration
@@ -0,0 +1,2 @@
+CONFIG_GDMA_ENABLE_WEIGHTED_ARBITRATION=y
+CONFIG_IDF_EXPERIMENTAL_FEATURES=y
diff --git a/components/hal/esp32c5/include/hal/ahb_dma_ll.h b/components/hal/esp32c5/include/hal/ahb_dma_ll.h
index 787edf0ec5..54675abfd2 100644
--- a/components/hal/esp32c5/include/hal/ahb_dma_ll.h
+++ b/components/hal/esp32c5/include/hal/ahb_dma_ll.h
@@ -22,6 +22,7 @@ extern "C" {
 #define AHB_DMA_LL_GET_HW(id) (((id) == 0) ? (&AHB_DMA) : NULL)
 
 #define GDMA_LL_CHANNEL_MAX_PRIORITY 5 // supported priority levels: [0,5]
+#define GDMA_LL_CHANNEL_MAX_WEIGHT 15  // supported weight levels: [0,15]
 
 #define GDMA_LL_RX_EVENT_MASK       (0x7F)
 #define GDMA_LL_TX_EVENT_MASK       (0x3F)
@@ -138,6 +139,25 @@ static inline void ahb_dma_ll_set_default_memory_range(ahb_dma_dev_t *dev)
     dev->intr_mem_end_addr.val = 0x44000000;
 }
 
+/**
+ * @brief Enable the weighted arbitration for AHB-DMA
+ */
+static inline void ahb_dma_ll_enable_weighted_arb(ahb_dma_dev_t *dev, bool enable)
+{
+    dev->weight_en.weight_en = enable;
+}
+
+/**
+ * @brief Set the weighted arbitration timeout for AHB-DMA
+ *
+ * @param timeout AHB bus clock cycle
+ */
+static inline void ahb_dma_ll_set_weighted_arb_timeout(ahb_dma_dev_t *dev, uint32_t timeout)
+{
+    HAL_ASSERT(timeout != 0 && timeout <= 65535);
+    dev->arb_timeout.arb_timeout_num = timeout;
+}
+
 ///////////////////////////////////// RX /////////////////////////////////////////
 /**
  * @brief Get DMA RX channel interrupt status word
@@ -391,6 +411,22 @@ static inline void ahb_dma_ll_rx_enable_etm_task(ahb_dma_dev_t *dev, uint32_t ch
     dev->channel[channel].in.in_conf0.in_etm_en_chn = enable;
 }
 
+/**
+ * @brief Enable the weighted arbitration optimize for DMA RX channel
+ */
+static inline void ahb_dma_ll_rx_enable_weighted_arb_opt(ahb_dma_dev_t *dev, uint32_t channel, bool enable)
+{
+    dev->in_crc_arb[channel].arb_weight_opt.rx_arb_weight_opt_dis_chn = !enable;
+}
+
+/**
+ * @brief Set the weight for DMA RX channel
+ */
+static inline void ahb_dma_ll_rx_set_weight(ahb_dma_dev_t *dev, uint32_t channel, uint32_t weight)
+{
+    dev->in_crc_arb[channel].ch_arb_weight.rx_arb_weight_value_chn = weight;
+}
+
 ///////////////////////////////////// TX /////////////////////////////////////////
 /**
  * @brief Get DMA TX channel interrupt status word
@@ -642,6 +678,22 @@ static inline void ahb_dma_ll_tx_enable_etm_task(ahb_dma_dev_t *dev, uint32_t ch
     dev->channel[channel].out.out_conf0.out_etm_en_chn = enable;
 }
 
+/**
+ * @brief Enable the weighted arbitration optimize for DMA TX channel
+ */
+static inline void ahb_dma_ll_tx_enable_weighted_arb_opt(ahb_dma_dev_t *dev, uint32_t channel, bool enable)
+{
+    dev->out_crc_arb[channel].arb_weight_opt.tx_arb_weight_opt_dis_chn = !enable;
+}
+
+/**
+ * @brief Set the weight for DMA TX channel
+ */
+static inline void ahb_dma_ll_tx_set_weight(ahb_dma_dev_t *dev, uint32_t channel, uint32_t weight)
+{
+    dev->out_crc_arb[channel].ch_arb_weight.tx_arb_weight_value_chn = weight;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/components/hal/gdma_hal_ahb_v2.c b/components/hal/gdma_hal_ahb_v2.c
index 0acb2b21c7..2fe449019e 100644
--- a/components/hal/gdma_hal_ahb_v2.c
+++ b/components/hal/gdma_hal_ahb_v2.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -232,6 +232,17 @@ void gdma_ahb_hal_enable_etm_task(gdma_hal_context_t *hal, int chan_id, gdma_cha
 }
 #endif // SOC_GDMA_SUPPORT_ETM
 
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+void gdma_ahb_hal_set_weight(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, uint32_t weight)
+{
+    if (dir == GDMA_CHANNEL_DIRECTION_RX) {
+        ahb_dma_ll_rx_set_weight(hal->ahb_dma_dev, chan_id, weight);
+    } else {
+        ahb_dma_ll_tx_set_weight(hal->ahb_dma_dev, chan_id, weight);
+    }
+}
+#endif // SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+
 void gdma_ahb_hal_init(gdma_hal_context_t *hal, const gdma_hal_config_t *config)
 {
     hal->ahb_dma_dev = AHB_DMA_LL_GET_HW(config->group_id - GDMA_LL_AHB_GROUP_START_ID);
@@ -261,5 +272,20 @@ void gdma_ahb_hal_init(gdma_hal_context_t *hal, const gdma_hal_config_t *config)
 #if GDMA_LL_AHB_BURST_SIZE_ADJUSTABLE
     hal->set_burst_size = gdma_ahb_hal_set_burst_size;
 #endif // GDMA_LL_AHB_BURST_SIZE_ADJUSTABLE
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+    hal->set_weight = gdma_ahb_hal_set_weight;
+    if (config->flags.enable_weighted_arbitration) {
+        ahb_dma_ll_enable_weighted_arb(hal->ahb_dma_dev, true);
+        // always enable weighted arbitration optimize
+        for (int i = 0; i < SOC_GDMA_PAIRS_PER_GROUP_MAX; i++) {
+            ahb_dma_ll_tx_enable_weighted_arb_opt(hal->ahb_dma_dev, i, true);
+            ahb_dma_ll_rx_enable_weighted_arb_opt(hal->ahb_dma_dev, i, true);
+        }
+        // set timeout to 2000 AHB bus cycles, to ensure that all channels within each time period can almost consume all the tokens
+        ahb_dma_ll_set_weighted_arb_timeout(hal->ahb_dma_dev, 2000);
+    } else {
+        ahb_dma_ll_enable_weighted_arb(hal->ahb_dma_dev, false);
+    }
+#endif // SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
     ahb_dma_ll_set_default_memory_range(hal->ahb_dma_dev);
 }
diff --git a/components/hal/gdma_hal_top.c b/components/hal/gdma_hal_top.c
index d41eb342cb..7df09676f7 100644
--- a/components/hal/gdma_hal_top.c
+++ b/components/hal/gdma_hal_top.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2020-2025 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -120,3 +120,10 @@ void gdma_hal_enable_etm_task(gdma_hal_context_t *hal, int chan_id, gdma_channel
     hal->enable_etm_task(hal, chan_id, dir, en_or_dis);
 }
 #endif // SOC_GDMA_SUPPORT_ETM
+
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+void gdma_hal_set_weight(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, uint32_t weight)
+{
+    hal->set_weight(hal, chan_id, dir, weight);
+}
+#endif // SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
diff --git a/components/hal/include/hal/gdma_hal.h b/components/hal/include/hal/gdma_hal.h
index 3231cc583e..a7e7a03834 100644
--- a/components/hal/include/hal/gdma_hal.h
+++ b/components/hal/include/hal/gdma_hal.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
+ * SPDX-FileCopyrightText: 2020-2025 Espressif Systems (Shanghai) CO LTD
  *
  * SPDX-License-Identifier: Apache-2.0
  */
@@ -34,6 +34,9 @@ typedef struct gdma_hal_context_t gdma_hal_context_t;
  */
 typedef struct {
     int group_id;  /*!< GDMA group ID */
+    struct {
+        uint32_t enable_weighted_arbitration: 1; /*!< Enable weighted arbitration */
+    } flags; /*!< Extra configuration flags */
 } gdma_hal_config_t;
 
 typedef struct {
@@ -96,6 +99,9 @@ struct gdma_hal_context_t {
 #if SOC_GDMA_SUPPORT_ETM
     void (*enable_etm_task)(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, bool en_or_dis); /// Enable the ETM task
 #endif // SOC_GDMA_SUPPORT_ETM
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+    void (*set_weight)(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, uint32_t weight);  /// Set the channel weight
+#endif // SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
 };
 
 void gdma_hal_deinit(gdma_hal_context_t *hal);
@@ -151,6 +157,10 @@ uint32_t gdma_hal_get_crc_result(gdma_hal_context_t *hal, int chan_id, gdma_chan
 void gdma_hal_enable_etm_task(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, bool en_or_dis);
 #endif
 
+#if SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+void gdma_hal_set_weight(gdma_hal_context_t *hal, int chan_id, gdma_channel_direction_t dir, uint32_t weight);
+#endif //SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/components/soc/esp32c5/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c5/include/soc/Kconfig.soc_caps.in
index ffacb5385f..bec4c3a66f 100644
--- a/components/soc/esp32c5/include/soc/Kconfig.soc_caps.in
+++ b/components/soc/esp32c5/include/soc/Kconfig.soc_caps.in
@@ -507,6 +507,10 @@ config SOC_AHB_GDMA_SUPPORT_PSRAM
     bool
     default y
 
+config SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION
+    bool
+    default y
+
 config SOC_ETM_GROUPS
     int
     default 1
diff --git a/components/soc/esp32c5/include/soc/soc_caps.h b/components/soc/esp32c5/include/soc/soc_caps.h
index 1b59fb4924..bf5deb5607 100644
--- a/components/soc/esp32c5/include/soc/soc_caps.h
+++ b/components/soc/esp32c5/include/soc/soc_caps.h
@@ -195,6 +195,7 @@
 #define SOC_GDMA_SUPPORT_ETM            1
 #define SOC_GDMA_SUPPORT_SLEEP_RETENTION    1
 #define SOC_AHB_GDMA_SUPPORT_PSRAM 1
+#define SOC_GDMA_SUPPORT_WEIGHTED_ARBITRATION   1
 
 /*-------------------------- ETM CAPS --------------------------------------*/
 #define SOC_ETM_GROUPS                  1U  // Number of ETM groups