diff --git a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c index 7406cdbccb..068668e965 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c @@ -60,6 +60,11 @@ #include "soc/hp_system_reg.h" #endif +#if SOC_CPU_HAS_HWLOOP +#include "riscv/csr.h" +#include "riscv/csr_hwlp.h" +#endif + #if ( SOC_CPU_COPROC_NUM > 0 ) #include "esp_private/panic_internal.h" @@ -125,9 +130,23 @@ StackType_t *xIsrStackBottom[portNUM_PROCESSORS] = {0}; BaseType_t xPortStartScheduler(void) { #if ( SOC_CPU_COPROC_NUM > 0 ) + +#if SOC_CPU_HAS_FPU /* Disable FPU so that the first task to use it will trigger an exception */ rv_utils_disable_fpu(); -#endif +#endif /* SOC_CPU_HAS_FPU */ + +#if SOC_CPU_HAS_PIE + /* Similarly, disable PIE */ + rv_utils_disable_pie(); +#endif /* SOC_CPU_HAS_FPU */ + +#if SOC_CPU_HAS_HWLOOP + /* Initialize the Hardware loop feature */ + RV_WRITE_CSR(CSR_HWLP_STATE_REG, HWLP_INITIAL_STATE); +#endif /* SOC_CPU_HAS_HWLOOP */ +#endif /* ( SOC_CPU_COPROC_NUM > 0 ) */ + /* Initialize all kernel state tracking variables */ BaseType_t coreID = xPortGetCoreID(); port_uxInterruptNesting[coreID] = 0; @@ -826,7 +845,7 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop * @param coreid Current core * @param coproc Coprocessor to save context of * - * @returns Coprocessor former owner's save are, can be NULL is there was no owner yet, can be -1 if + * @returns Coprocessor former owner's save area, can be NULL if there was no owner yet, can be -1 if * the former owner is the same as the new owner. */ RvCoprocSaveArea* pxPortUpdateCoprocOwner(int coreid, int coproc, StaticTask_t* owner) diff --git a/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S b/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S index 067c846c3e..b737e58d7c 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S +++ b/components/freertos/FreeRTOS-Kernel/portable/riscv/portasm.S @@ -92,7 +92,7 @@ rtos_save_\name\()_coproc: \save_coproc_regs a0 rtos_save_\name\()_coproc_nosave: #if ( configNUM_CORES > 1 ) - /* Pin current task to current core */ + /* Pin current task to current core, s1 has pxCurrentTCBs */ mv a0, s1 csrr a1, mhartid call vPortTaskPinToCore @@ -184,7 +184,33 @@ rtos_save_\name\()_coproc_norestore: .endm -generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, hwlp_restore_regs + /** + * @brief Restore the HWLP registers contained in the dedicated save area if the given task ever used it. + * This routine sets the HWLP context to clean in any case. + * + * @param a0 StaticTask address for the newly scheduled task + */ +hwlp_restore_if_used: + addi sp, sp, -16 + sw ra, (sp) + /* Check if the HWLP was in use beforehand */ + li a1, 0 + li a2, HWLP_COPROC_IDX + call pxPortGetCoprocArea + /* Get the enable flags from the coprocessor save area */ + lw a1, RV_COPROC_ENABLE(a0) + /* To avoid having branches below, set the coprocessor enable flag now */ + andi a2, a1, 1 << HWLP_COPROC_IDX + beqz a2, _hwlp_restore_never_used + /* Enable bit was set, restore the coprocessor context */ + lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[HWLP_COPROC_IDX] */ + hwlp_restore_regs a0 +_hwlp_restore_never_used: + /* Clear the context */ + csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE + lw ra, (sp) + addi sp, sp, 16 + ret #endif /* SOC_CPU_HAS_HWLOOP */ @@ -192,7 +218,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, #if SOC_CPU_HAS_PIE /** - * @brief Macros to enable and disable the hardware loop feature on the current core + * @brief Macros to enable and disable the PIE coprocessor on the current core */ .macro pie_enable scratch_reg=a0 li \scratch_reg, 1 @@ -200,7 +226,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, .endm /** - * @brief Disable HW Loop CPU feature while returning the former status in the given register + * @brief Disable the PIE coprocessor while returning the former status in the given register */ .macro pie_disable reg csrrw \reg, CSR_PIE_STATE_REG, zero @@ -213,7 +239,7 @@ generate_coprocessor_routine hwlp, HWLP_COPROC_IDX, hwlp_enable, hwlp_save_regs, .endm /** - * @brief Macros to save and restore the hardware loop registers to and from the given frame + * @brief Macros to save and restore the PIE coprocessor registers to and from the given frame */ .macro pie_save_regs frame=a0 /* Save the 128-bit Q registers from the frame memory and then frame += 16 */ @@ -427,15 +453,16 @@ rtos_current_tcb: * 16-bit instructions. * @returns Context that should be given to `rtos_int_exit`. On targets that have coprocessors, * this value is a bitmap where bit i is 1 if coprocessor i is enable, 0 if it is disabled. + * This routine can use the s registers too since they are not used by the caller (yet) */ .global rtos_int_enter .type rtos_int_enter, @function rtos_int_enter: #if ( configNUM_CORES > 1 ) - csrr a5, mhartid /* a5 = coreID */ - slli a5, a5, 2 /* a5 = coreID * 4 */ + csrr s0, mhartid /* s0 = coreID */ + slli s0, s0, 2 /* s0 = coreID * 4 */ la a0, port_xSchedulerRunning /* a0 = &port_xSchedulerRunning */ - add a0, a0, a5 /* a0 = &port_xSchedulerRunning[coreID] */ + add a0, a0, s0 /* a0 = &port_xSchedulerRunning[coreID] */ lw a0, (a0) /* a0 = port_xSchedulerRunning[coreID] */ #else lw a0, port_xSchedulerRunning /* a0 = port_xSchedulerRunning */ @@ -446,7 +473,7 @@ rtos_int_enter: /* Increment the ISR nesting count */ la a0, port_uxInterruptNesting /* a0 = &port_uxInterruptNesting */ #if ( configNUM_CORES > 1 ) - add a0, a0, a5 /* a0 = &port_uxInterruptNesting[coreID] // a5 already contains coreID * 4 */ + add a0, a0, s0 /* a0 = &port_uxInterruptNesting[coreID] // s0 contains coreID * 4 */ #endif /* ( configNUM_CORES > 1 ) */ lw a1, 0(a0) /* a1 = port_uxInterruptNesting[coreID] */ addi a2, a1, 1 /* a2 = a1 + 1 */ @@ -456,19 +483,13 @@ rtos_int_enter: li a0, 0 /* return 0 in case we are going to branch */ bnez a1, rtos_int_enter_end /* if (port_uxInterruptNesting[coreID] > 0) jump to rtos_int_enter_end */ - li a7, 0 + li s2, 0 #if SOC_CPU_COPROC_NUM > 0 /* Disable the coprocessors to forbid the ISR from using it */ -#if SOC_CPU_HAS_HWLOOP - /* The current HWLP status will be returned in a0 */ - hwlp_disable a0 - or a7, a7, a0 -#endif /* SOC_CPU_HAS_HWLOOP */ - #if SOC_CPU_HAS_PIE - /* The current HWLP status will be returned in a0 */ + /* The current PIE coprocessor status will be returned in a0 */ pie_disable a0 - or a7, a7, a0 + or s2, s2, a0 #endif /* SOC_CPU_HAS_PIE */ #if SOC_CPU_HAS_FPU @@ -485,24 +506,48 @@ rtos_int_enter: /* Save the current sp in pxCurrentTCBs[coreID] and load the ISR stack on to sp */ #if ( configNUM_CORES > 1 ) la a0, pxCurrentTCBs /* a0 = &pxCurrentTCBs */ - add a0, a0, a5 /* a0 = &pxCurrentTCBs[coreID] // a5 already contains coreID * 4 */ + add a0, a0, s0 /* a0 = &pxCurrentTCBs[coreID] // s0 already contains coreID * 4 */ lw a0, (a0) /* a0 = pxCurrentTCBs[coreID] */ sw sp, 0(a0) /* pxCurrentTCBs[coreID] = sp */ - la a0, xIsrStackTop /* a0 = &xIsrStackTop */ - add a0, a0, a5 /* a0 = &xIsrStackTop[coreID] // a5 already contains coreID * 4 */ - lw sp, (a0) /* sp = xIsrStackTop[coreID] */ + /* We may need a0 below to call pxPortGetCoprocArea */ + la a1, xIsrStackTop /* a1 = &xIsrStackTop */ + add a1, a1, s0 /* a1 = &xIsrStackTop[coreID] // s0 already contains coreID * 4 */ + lw sp, (a1) /* sp = xIsrStackTop[coreID] */ #else lw a0, pxCurrentTCBs /* a0 = pxCurrentTCBs */ sw sp, 0(a0) /* pxCurrentTCBs[0] = sp */ lw sp, xIsrStackTop /* sp = xIsrStackTop */ #endif /* ( configNUM_CORES > 1 ) */ +#if SOC_CPU_HAS_HWLOOP + /* Check if the current task used the Hardware loop feature, by reading the state */ + csrr a1, CSR_HWLP_STATE_REG + addi a1, a1, -HWLP_DIRTY_STATE + bnez a1, 1f + /* State is dirty! The hardware loop feature was used, save the registers */ + li a1, 1 /* Allocate the save area if not already allocated */ + li a2, HWLP_COPROC_IDX + mv s1, ra + call pxPortGetCoprocArea + mv ra, s1 + /* Set the enable flags from the coprocessor save area */ + lw a1, RV_COPROC_ENABLE(a0) + ori a1, a1, 1 << HWLP_COPROC_IDX + sw a1, RV_COPROC_ENABLE(a0) + /* Get the area where we need to save the HWLP registers */ + lw a0, RV_COPROC_SA+HWLP_COPROC_IDX*4(a0) /* a0 = RvCoprocSaveArea->sa_coprocs[\coproc_idx] */ + hwlp_save_regs a0 + /* Disable the HWLP feature so that ISR cannot use them */ + csrwi CSR_HWLP_STATE_REG, HWLP_CLEAN_STATE +1: +#endif + #if CONFIG_ESP_SYSTEM_HW_STACK_GUARD /* Prepare the parameters for esp_hw_stack_guard_set_bounds(xIsrStackBottom, xIsrStackTop); */ #if ( configNUM_CORES > 1 ) /* Load the xIsrStack for the current core and set the new bounds */ la a0, xIsrStackBottom - add a0, a0, a5 /* a0 = &xIsrStackBottom[coreID] */ + add a0, a0, s0 /* a0 = &xIsrStackBottom[coreID] */ lw a0, (a0) /* a0 = xIsrStackBottom[coreID] */ #else lw a0, xIsrStackBottom @@ -514,8 +559,8 @@ rtos_int_enter: ESP_HW_STACK_GUARD_MONITOR_START_CUR_CORE a0 a1 #endif /* CONFIG_ESP_SYSTEM_HW_STACK_GUARD */ - /* Return the coprocessor context from a7 */ - mv a0, a7 + /* Return the coprocessor context from s2 */ + mv a0, s2 rtos_int_enter_end: ret @@ -569,11 +614,11 @@ isr_skip_decrement: /* If the CPU reached this label, a2 (uxInterruptNesting) is 0 for sure */ /* Schedule the next task if a yield is pending */ - la s7, xPortSwitchFlag /* a0 = &xPortSwitchFlag */ + la s7, xPortSwitchFlag /* s7 = &xPortSwitchFlag */ #if ( configNUM_CORES > 1 ) - add s7, s7, a1 /* a0 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */ + add s7, s7, a1 /* s7 = &xPortSwitchFlag[coreID] // a1 already contains coreID * 4 */ #endif /* ( configNUM_CORES > 1 ) */ - lw a0, 0(s7) /* a2 = xPortSwitchFlag[coreID] */ + lw a0, 0(s7) /* a0 = xPortSwitchFlag[coreID] */ beqz a0, no_switch_restore_coproc /* if (xPortSwitchFlag[coreID] == 0) jump to no_switch_restore_coproc */ /* Preserve return address and schedule next task. To speed up the process, and because this current routine @@ -601,10 +646,19 @@ isr_skip_decrement: mv ra, s10 /* Restore original return address */ beq a0, s9, no_switch_restore_coproc +#if SOC_CPU_HAS_HWLOOP + /* We have to restore the context of the HWLP if the newly scheduled task used it before. In all cases, this + * routine will also clean the state and set it to clean */ + mv s7, ra + /* a0 contains the current TCB address */ + call hwlp_restore_if_used + mv ra, s7 +#endif /* SOC_CPU_HAS_HWLOOP */ + #if SOC_CPU_HAS_FPU /* Disable the FPU in the `mstatus` value to return */ - li a0, ~CSR_MSTATUS_FPU_DISABLE - and s11, s11, a0 + li a1, ~CSR_MSTATUS_FPU_DISABLE + and s11, s11, a1 #endif /* SOC_CPU_HAS_FPU */ j no_switch_restored @@ -614,17 +668,24 @@ no_switch_restore_coproc: /* We reach here either because there is no switch scheduled or because the TCB that is going to be scheduled * is the same as the one that has been interrupted. In both cases, we need to restore the coprocessors status */ #if SOC_CPU_HAS_HWLOOP - andi a0, s8, 1 << HWLP_COPROC_IDX - beqz a0, 1f - hwlp_enable a0 + /* Check if the ISR altered the state of the HWLP */ + csrr a1, CSR_HWLP_STATE_REG + addi a1, a1, -HWLP_DIRTY_STATE + bnez a1, 1f + /* ISR used the HWLP, restore the HWLP context! */ + mv s7, ra + /* a0 contains the current TCB address */ + call hwlp_restore_if_used + mv ra, s7 1: + /* Else, the ISR hasn't touched HWLP registers, we don't need to restore the HWLP registers */ #endif /* SOC_CPU_HAS_HWLOOP */ #if SOC_CPU_HAS_PIE andi a0, s8, 1 << PIE_COPROC_IDX - beqz a0, 1f + beqz a0, 2f pie_enable a0 -1: +2: #endif /* SOC_CPU_HAS_PIE */ no_switch_restored: diff --git a/components/freertos/app_startup.c b/components/freertos/app_startup.c index 7e4439d0b8..c7f8116769 100644 --- a/components/freertos/app_startup.c +++ b/components/freertos/app_startup.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2022-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ diff --git a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c index 8779a28238..fe59eb4bc2 100644 --- a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c +++ b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c @@ -192,7 +192,7 @@ TEST_CASE("FPU: Usage in unpinned task", "[freertos]") typedef struct { bool negative; TaskHandle_t main; -} ParamsFPU; +} fpu_params_t; /** * @brief Function performing some simple calculation using several FPU registers. @@ -200,7 +200,7 @@ typedef struct { */ void fpu_calculation(void* arg) { - ParamsFPU* p = (ParamsFPU*) arg; + fpu_params_t* p = (fpu_params_t*) arg; const bool negative = p->negative; const float init = negative ? -1.f : 1.f; float f = init; @@ -236,7 +236,7 @@ TEST_CASE("FPU: Unsolicited context switch between tasks using FPU", "[freertos] /* Create two tasks that are on the same core and use the same FPU */ TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); TaskHandle_t tasks[2]; - ParamsFPU params[2] = { + fpu_params_t params[2] = { { .negative = false, .main = unity_task_handle }, { .negative = true, .main = unity_task_handle }, }; diff --git a/components/freertos/test_apps/freertos/port/test_hwlp.c b/components/freertos/test_apps/freertos/port/test_hwlp.c index 6d32263fcf..49347318f5 100644 --- a/components/freertos/test_apps/freertos/port/test_hwlp.c +++ b/components/freertos/test_apps/freertos/port/test_hwlp.c @@ -19,109 +19,50 @@ */ #if SOC_CPU_HAS_HWLOOP -static uint32_t use_hwlp(uint32_t count) -{ - uint32_t ret; - asm volatile( - /* The toolchain doesn't support HWLP instructions yet, manually set it up */ - "la a2, start\n" - "csrw 0x7c6, a2\n" - "la a2, end\n" - "csrw 0x7c7, a2\n" - "csrw 0x7c8, a0\n" - "li a1, 0\n" - /* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */ - "start:\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "addi a1, a1, 1\n" - "end:\n" - "addi a1, a1, 1\n" - "mv %0, a1\n" - "ret\n" - : "=r"(ret) :); - return ret; -} - -static void other_task(void* arg) -{ - const TaskHandle_t main_task = (TaskHandle_t) arg; - - use_hwlp(10); - - xTaskNotifyGive(main_task); - vTaskDelete(NULL); -} +uint32_t use_hwlp(uint32_t count); TEST_CASE("HWLP: Context save does not affect stack watermark", "[freertos]") { - TaskHandle_t pvCreatedTask; /* Force the FreeRTOS port layer to store a HWLP context in the current task. * So let's use the it and make sure another task, on the SAME CORE, also uses it */ - const int core_id = xPortGetCoreID(); const TaskHandle_t current_handle = xTaskGetCurrentTaskHandle(); /* Get the current stack watermark */ const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle); - /* Use the HWLP unit, the context will NOT be flushed until another task starts using it */ + /* Use the HWLP unit, the context will NOT be flushed until a context switch is done */ use_hwlp(20); - xTaskCreatePinnedToCore(other_task, - "OtherTask", - 2048, - (void*) current_handle, - CONFIG_UNITY_FREERTOS_PRIORITY - 1, - &pvCreatedTask, - core_id); - + /* Make sure FreeRTOS switches to another task, even Idle task, so that the current Task saves + * the HWLP current context */ vTaskDelay(10); - /* Wait for other task to complete */ - ulTaskNotifyTake(pdTRUE, portMAX_DELAY); - const UBaseType_t after_watermark = uxTaskGetStackHighWaterMark(current_handle); TEST_ASSERT_TRUE(after_watermark > before_watermark / 2); } +#if CONFIG_FREERTOS_NUMBER_OF_CORES > 1 + typedef struct { uint32_t count; TaskHandle_t main; -} ParamsHWLP; +} hwlp_params_t; -void calculation(void* arg) +static void calculation(void* arg) { - ParamsHWLP* p = (ParamsHWLP*) arg; + hwlp_params_t* p = (hwlp_params_t*) arg; const uint32_t count = p->count; uint32_t result = 0; int i = 0; - for (i = 0; i < 10; i++) { + for (i = 0; i < 50000; i++) { uint32_t current = use_hwlp(count); result += current; - /* Give some time to the other to interrupt us before checking `f` value */ - esp_rom_delay_us(1000); - /* Using TEST_ASSERT_TRUE triggers a stack overflow, make sure the count is still correct. * The function `use_hwlp` should return (count * 16) */ assert(count * 16 == current); - - /* Give the hand back to FreeRTOS to avoid any watchdog error */ - vTaskDelay(2); } /* Make sure the result is correct */ @@ -131,14 +72,14 @@ void calculation(void* arg) vTaskDelete(NULL); } -TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[freertos]") +TEST_CASE("HWLP: Unsolicited context switch between tasks using HWLP", "[freertos]") { /* Create two tasks that are on the same core and use the same FPU */ TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); TaskHandle_t tasks[2]; - ParamsHWLP params[2] = { - { .count = 10, .main = unity_task_handle }, - { .count = 200, .main = unity_task_handle }, + hwlp_params_t params[2] = { + { .count = 1024, .main = unity_task_handle }, + { .count = 2048, .main = unity_task_handle }, }; xTaskCreatePinnedToCore(calculation, "Task1", 2048, params + 0, CONFIG_UNITY_FREERTOS_PRIORITY + 1, &tasks[0], 1); @@ -148,4 +89,6 @@ TEST_CASE("HWLP: Unsolicited context switch between tasks using the PIE", "[free ulTaskNotifyTake(pdTRUE, portMAX_DELAY); } +#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */ + #endif // SOC_CPU_HAS_HWLOOP diff --git a/components/freertos/test_apps/freertos/port/test_hwlp_routines.S b/components/freertos/test_apps/freertos/port/test_hwlp_routines.S new file mode 100644 index 0000000000..80bd955776 --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_hwlp_routines.S @@ -0,0 +1,52 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "sdkconfig.h" +#include "soc/soc_caps.h" + +#if SOC_CPU_HAS_HWLOOP + + .text + .align 4 + +/** + * @brief Perform a hardware loop with a given number of iterations + * + * @param a0 Number of iterations + */ + .global use_hwlp + .type use_hwlp, @function +use_hwlp: + /* The toolchain doesn't support HWLP instructions yet, manually set it up */ + la a2, start + csrw 0x7c6, a2 + la a2, end + csrw 0x7c7, a2 + csrw 0x7c8, a0 + li a1, 0 + /* Hardware loops must have at least 8 32-bit instructions or 16 16-bit instructions */ +start: + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 + addi a1, a1, 1 +end: + addi a1, a1, 1 + mv a0, a1 + ret + .size use_hwlp, .-use_hwlp + +#endif /* SOC_CPU_HAS_HWLOOP */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_in_task.c b/components/freertos/test_apps/freertos/port/test_pie_in_task.c index c5f632ed91..aa9bcf658e 100644 --- a/components/freertos/test_apps/freertos/port/test_pie_in_task.c +++ b/components/freertos/test_apps/freertos/port/test_pie_in_task.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -13,28 +13,26 @@ #include "unity.h" #include "test_utils.h" +/* PIE instructions set is currently only supported in GCC compiler */ #if SOC_CPU_HAS_PIE /** - * @brief Performs the sum of two 4-word vectors using the PIE. + * @brief Performs the signed sum of two 4-word vectors using the PIE. * * @param a First vector * @param b Second vector * @param dst Destination to store the sum - * - * @returns a will store a + b */ -static void pie_vector_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]) -{ - asm volatile("esp.vld.128.ip q0, a0, 0\n" - "esp.vld.128.ip q1, a1, 0\n" - "esp.vadd.s32 q2, q0, q1\n" - "esp.vst.128.ip q2, a2, 0\n" - ::); -} +void pie_vector_signed_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]); /* ------------------------------------------------------------------------------------------------------------------ */ +typedef struct { + int32_t cst; + TaskHandle_t main; + SemaphoreHandle_t sem; +} pie_params_t; + /* Test PIE usage from a task context @@ -59,16 +57,22 @@ Expected: static void pinned_task(void *arg) { + pie_params_t *param = (pie_params_t*) arg; ulTaskNotifyTake(pdTRUE, portMAX_DELAY); - int32_t a[4] = { 42, 42, 42, 42}; + int32_t constant = 42 * param->cst; + int32_t a[4] = { constant, constant, constant, constant }; int32_t b[4] = { 10, 20, 30, 40 }; int32_t dst[4] = { 0 }; - pie_vector_add(a, b, dst); + pie_vector_signed_add(a, b, dst); - // Indicate done wand wait to be deleted - xSemaphoreGive((SemaphoreHandle_t)arg); + for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) { + TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]); + } + + // Indicate done and wait to be deleted + xSemaphoreGive((SemaphoreHandle_t)param->sem); vTaskSuspend(NULL); } @@ -79,15 +83,20 @@ TEST_CASE("PIE: Usage in task", "[freertos]") for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) { TaskHandle_t task_handles[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS]; + pie_params_t params[CONFIG_FREERTOS_NUMBER_OF_CORES][TEST_PINNED_NUM_TASKS]; // Create test tasks for each core for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) { for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { - TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i)); + params[i][j] = (pie_params_t) { + .cst = i + j + 1, + .sem = done_sem, + }; + TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *) ¶ms[i][j], UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i)); } } - // Start the created tasks simultaneously + // Start the created tasks for (int i = 0; i < CONFIG_FREERTOS_NUMBER_OF_CORES; i++) { for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { xTaskNotifyGive(task_handles[i][j]); @@ -159,7 +168,7 @@ static void unpinned_task(void *arg) int32_t b[4] = { 111, 222, 333, 444 }; int32_t dst[4] = { 0 }; - pie_vector_add(a, b, dst); + pie_vector_signed_add(a, b, dst); for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) { TEST_ASSERT_EQUAL(dst[i], a[i] + b[i]); @@ -196,24 +205,19 @@ TEST_CASE("PIE: Usage in unpinned task", "[freertos]") } } -typedef struct { - int32_t cst; - TaskHandle_t main; -} ParamsPIE; - /** * @brief Function performing some simple calculation using the PIE coprocessor. * The goal is to be preempted by a task that also uses the PIE on the same core. */ -void pie_calculation(void* arg) +static void pie_calculation(void* arg) { - ParamsPIE* p = (ParamsPIE*) arg; + pie_params_t* p = (pie_params_t*) arg; const int32_t cst = p->cst; int32_t a[4] = { cst, cst, cst, cst }; int32_t dst[4] = { 0 }; for (int i = 0; i < 10; i++) { - pie_vector_add(a, dst, dst); + pie_vector_signed_add(a, dst, dst); /* Give some time to the other to interrupt us before checking `f` value */ esp_rom_delay_us(1000); @@ -237,7 +241,7 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer /* Create two tasks that are on the same core and use the same FPU */ TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); TaskHandle_t tasks[2]; - ParamsPIE params[2] = { + pie_params_t params[2] = { { .cst = 1, .main = unity_task_handle }, { .cst = -1, .main = unity_task_handle }, }; @@ -249,5 +253,6 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer ulTaskNotifyTake(pdTRUE, portMAX_DELAY); } -#endif // CONFIG_FREERTOS_NUMBER_OF_CORES > 1 -#endif // SOC_CPU_HAS_PIE +#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */ + +#endif /* SOC_CPU_HAS_PIE */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_routines.S b/components/freertos/test_apps/freertos/port/test_pie_routines.S new file mode 100644 index 0000000000..7de3a4da28 --- /dev/null +++ b/components/freertos/test_apps/freertos/port/test_pie_routines.S @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "sdkconfig.h" +#include "soc/soc_caps.h" + +/* PIE instructions set is currently only supported in GCC compiler */ +#if SOC_CPU_HAS_PIE + + .text + .align 4 + +/** + * @brief Performs the unsigned sum of two 4-word vectors using the PIE. + * + * @param a0 First vector + * @param a1 Second vector + * @param a2 Destination to store the sum + */ + .type pie_vector_unsigned_add, @function + .global pie_vector_unsigned_add +pie_vector_unsigned_add: + esp.vld.128.ip q0, a0, 0 + esp.vld.128.ip q1, a1, 0 + esp.vadd.u32 q2, q0, q1 + esp.vst.128.ip q2, a2, 0 + ret + .size pie_vector_unsigned_add, .-pie_vector_unsigned_add + + +/** + * @brief Performs the signed sum of two 4-word vectors using the PIE. + * + * @param a0 First vector + * @param a1 Second vector + * @param a2 Destination to store the sum + */ + .type pie_vector_signed_add, @function + .global pie_vector_signed_add +pie_vector_signed_add: + esp.vld.128.ip q0, a0, 0 + esp.vld.128.ip q1, a1, 0 + esp.vadd.s32 q2, q0, q1 + esp.vst.128.ip q2, a2, 0 + ret + .size pie_vector_signed_add, .-pie_vector_signed_add + +#endif /* SOC_CPU_HAS_PIE */ diff --git a/components/freertos/test_apps/freertos/port/test_pie_watermark.c b/components/freertos/test_apps/freertos/port/test_pie_watermark.c index 5f5725cf29..075b40b34f 100644 --- a/components/freertos/test_apps/freertos/port/test_pie_watermark.c +++ b/components/freertos/test_apps/freertos/port/test_pie_watermark.c @@ -21,23 +21,24 @@ */ #if SOC_CPU_HAS_PIE -static void use_pie(uint32_t a[4], uint32_t b[4]) -{ - asm volatile("esp.vld.128.ip q0, %0, 0\n" - "esp.vld.128.ip q1, %2, 0\n" - "esp.vadd.u32 q2, q0, q1\n" - "esp.vst.128.ip q2, %0, 0\n" - : "=r"(a) : "r"(a), "r"(b)); -} +/** + * @brief Performs the signed sum of two 4-word vectors using the PIE. + * + * @param a First vector + * @param b Second vector + * @param dst Destination to store the sum + */ +void pie_vector_unsigned_add(const uint32_t a[4], const uint32_t b[4], uint32_t dst[4]); static void other_task(void* arg) { uint32_t a[4] = { 1, 2, 3, 4}; uint32_t b[4] = { 42, 43, 44, 45}; + uint32_t dst[4] = { 0 }; const TaskHandle_t main_task = (TaskHandle_t) arg; /* This task must also use the PIE coprocessor to force a PIE context flush on the main task */ - use_pie(a, b); + pie_vector_unsigned_add(a, b, dst); xTaskNotifyGive(main_task); vTaskDelete(NULL); @@ -48,6 +49,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]") /* Setup some random values */ uint32_t a[4] = { 0x3f00ffff, 0xffe10045, 0xffe10096, 0x42434546}; uint32_t b[4] = { 0x42, 0xbb43, 0x6644, 0x845}; + uint32_t dst[4] = { 0 }; TaskHandle_t pvCreatedTask; /* Force the FreeRTOS port layer to store a PIE context in the current task. @@ -59,7 +61,7 @@ TEST_CASE("PIE: Context save does not affect stack watermark", "[freertos]") const UBaseType_t before_watermark = uxTaskGetStackHighWaterMark(current_handle); /* Use the PIE unit, the context will NOT be flushed until another task starts using it */ - use_pie(a, b); + pie_vector_unsigned_add(a, b, dst); xTaskCreatePinnedToCore(other_task, "OtherTask", diff --git a/components/riscv/include/riscv/csr_hwlp.h b/components/riscv/include/riscv/csr_hwlp.h index 91c946a50a..83a44ea441 100644 --- a/components/riscv/include/riscv/csr_hwlp.h +++ b/components/riscv/include/riscv/csr_hwlp.h @@ -18,6 +18,11 @@ */ #define CSR_HWLP_STATE_REG 0x7F1 +#define HWLP_OFF_STATE 0 +#define HWLP_INITIAL_STATE 1 +#define HWLP_CLEAN_STATE 2 +#define HWLP_DIRTY_STATE 3 + #define CSR_LOOP0_START_ADDR 0x7C6 #define CSR_LOOP0_END_ADDR 0x7C7 #define CSR_LOOP0_COUNT 0x7C8 diff --git a/components/riscv/include/riscv/rv_utils.h b/components/riscv/include/riscv/rv_utils.h index 716fee6e80..c0d3a64b2f 100644 --- a/components/riscv/include/riscv/rv_utils.h +++ b/components/riscv/include/riscv/rv_utils.h @@ -14,6 +14,7 @@ #include "esp_attr.h" #include "riscv/csr.h" #include "riscv/interrupt.h" +#include "riscv/csr_pie.h" #ifdef __cplusplus extern "C" { @@ -168,6 +169,27 @@ FORCE_INLINE_ATTR void rv_utils_disable_fpu(void) #endif /* SOC_CPU_HAS_FPU */ +/* ------------------------------------------------- PIE Related ---------------------------------------------------- + * + * ------------------------------------------------------------------------------------------------------------------ */ + +#if SOC_CPU_HAS_PIE + +FORCE_INLINE_ATTR void rv_utils_enable_pie(void) +{ + RV_WRITE_CSR(CSR_PIE_STATE_REG, 1); +} + + +FORCE_INLINE_ATTR void rv_utils_disable_pie(void) +{ + RV_WRITE_CSR(CSR_PIE_STATE_REG, 0); +} + +#endif /* SOC_CPU_HAS_FPU */ + + + /* -------------------------------------------------- Memory Ports ----------------------------------------------------- * * ------------------------------------------------------------------------------------------------------------------ */ diff --git a/components/riscv/vectors.S b/components/riscv/vectors.S index 4a5db19afd..058877f8ed 100644 --- a/components/riscv/vectors.S +++ b/components/riscv/vectors.S @@ -168,17 +168,12 @@ _panic_handler: /* EXT_ILL CSR should contain the reason for the Illegal Instruction */ csrrw a0, EXT_ILL_CSR, zero -#if SOC_CPU_HAS_HWLOOP - /* Check if the HWLOOP bit is set. */ - andi a1, a0, EXT_ILL_RSN_HWLP - bnez a1, rtos_save_hwlp_coproc -#endif // SOC_CPU_HAS_HWLOOP - + /* Hardware loop cannot be treated lazily, so we should never end here if a HWLP instruction is used */ #if SOC_CPU_HAS_PIE - /* Check if the HWLOOP bit is set. */ + /* Check if the PIE bit is set. */ andi a1, a0, EXT_ILL_RSN_PIE bnez a1, rtos_save_pie_coproc -#endif // SOC_CPU_HAS_HWLOOP +#endif /* SOC_CPU_HAS_PIE */ #if SOC_CPU_HAS_FPU /* Check if the FPU bit is set. When targets have the FPU reason bug (SOC_CPU_HAS_FPU_EXT_ILL_BUG), diff --git a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in index 0471bf13d4..45d2d7dca5 100644 --- a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in @@ -411,6 +411,10 @@ config SOC_BRANCH_PREDICTOR_SUPPORTED bool default y +config SOC_CPU_COPROC_NUM + int + default 3 + config SOC_CPU_HAS_FPU bool default y @@ -427,10 +431,6 @@ config SOC_CPU_HAS_PIE bool default y -config SOC_CPU_COPROC_NUM - int - default 3 - config SOC_HP_CPU_HAS_MULTIPLE_CORES bool default y diff --git a/components/soc/esp32p4/include/soc/soc_caps.h b/components/soc/esp32p4/include/soc/soc_caps.h index 9ae67872fb..dd217cfe08 100644 --- a/components/soc/esp32p4/include/soc/soc_caps.h +++ b/components/soc/esp32p4/include/soc/soc_caps.h @@ -158,11 +158,15 @@ #define SOC_INT_CLIC_SUPPORTED 1 #define SOC_INT_HW_NESTED_SUPPORTED 1 // Support for hardware interrupts nesting #define SOC_BRANCH_PREDICTOR_SUPPORTED 1 +#define SOC_CPU_COPROC_NUM 3 #define SOC_CPU_HAS_FPU 1 #define SOC_CPU_HAS_FPU_EXT_ILL_BUG 1 // EXT_ILL CSR doesn't support FLW/FSW #define SOC_CPU_HAS_HWLOOP 1 +/* PIE coprocessor assembly is only supported with GCC compiler */ +#ifndef __clang__ #define SOC_CPU_HAS_PIE 1 -#define SOC_CPU_COPROC_NUM 3 +#endif + #define SOC_HP_CPU_HAS_MULTIPLE_CORES 1 // Convenience boolean macro used to determine if a target has multiple cores. #define SOC_CPU_BREAKPOINTS_NUM 3